| diskio_meat.hpp | | diskio_meat.hpp | |
| | | | |
| skipping to change at line 871 | | skipping to change at line 871 | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| uword cell_width; | | uword cell_width; | |
| | | | |
| // TODO: need sane values for complex numbers | | // TODO: need sane values for complex numbers | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| cell_width = 18; | | cell_width = 20; | |
| } | | } | |
| | | | |
| for(uword row=0; row < x.n_rows; ++row) | | for(uword row=0; row < x.n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| | | | |
| skipping to change at line 989 | | skipping to change at line 989 | |
| f << diskio::gen_txt_header(x) << '\n'; | | f << diskio::gen_txt_header(x) << '\n'; | |
| f << x.n_rows << ' ' << x.n_cols << '\n'; | | f << x.n_rows << ' ' << x.n_cols << '\n'; | |
| | | | |
| uword cell_width; | | uword cell_width; | |
| | | | |
| // TODO: need sane values for complex numbers | | // TODO: need sane values for complex numbers | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| cell_width = 18; | | cell_width = 20; | |
| } | | } | |
| | | | |
| for(uword row=0; row < x.n_rows; ++row) | | for(uword row=0; row < x.n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| | | | |
| skipping to change at line 1062 | | skipping to change at line 1062 | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| const ios::fmtflags orig_flags = f.flags(); | | const ios::fmtflags orig_flags = f.flags(); | |
| | | | |
| // TODO: need sane values for complex numbers | | // TODO: need sane values for complex numbers | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| } | | } | |
| | | | |
| uword x_n_rows = x.n_rows; | | uword x_n_rows = x.n_rows; | |
| uword x_n_cols = x.n_cols; | | uword x_n_cols = x.n_cols; | |
| | | | |
| for(uword row=0; row < x_n_rows; ++row) | | for(uword row=0; row < x_n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x_n_cols; ++col) | | for(uword col=0; col < x_n_cols; ++col) | |
| { | | { | |
| f << x.at(row,col); | | f << x.at(row,col); | |
| | | | |
| skipping to change at line 2206 | | skipping to change at line 2206 | |
| | | | |
| for(; iter != iter_end; ++iter) | | for(; iter != iter_end; ++iter) | |
| { | | { | |
| f.setf(ios::fixed); | | f.setf(ios::fixed); | |
| | | | |
| f << iter.row() << ' ' << iter.col() << ' '; | | f << iter.row() << ' ' << iter.col() << ' '; | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| } | | } | |
| | | | |
| f << (*iter) << '\n'; | | f << (*iter) << '\n'; | |
| } | | } | |
| | | | |
| // make sure it's possible to figure out the matrix size later | | // make sure it's possible to figure out the matrix size later | |
| if( (x.n_rows > 0) && (x.n_cols > 0) ) | | if( (x.n_rows > 0) && (x.n_cols > 0) ) | |
| { | | { | |
| const uword max_row = (x.n_rows > 0) ? x.n_rows-1 : 0; | | const uword max_row = (x.n_rows > 0) ? x.n_rows-1 : 0; | |
| const uword max_col = (x.n_cols > 0) ? x.n_cols-1 : 0; | | const uword max_col = (x.n_cols > 0) ? x.n_cols-1 : 0; | |
| | | | |
| skipping to change at line 2257 | | skipping to change at line 2257 | |
| | | | |
| for(; iter != iter_end; ++iter) | | for(; iter != iter_end; ++iter) | |
| { | | { | |
| f.setf(ios::fixed); | | f.setf(ios::fixed); | |
| | | | |
| f << iter.row() << ' ' << iter.col() << ' '; | | f << iter.row() << ' ' << iter.col() << ' '; | |
| | | | |
| if( (is_float<T>::value == true) || (is_double<T>::value == true) ) | | if( (is_float<T>::value == true) || (is_double<T>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| } | | } | |
| | | | |
| const eT val = (*iter); | | const eT val = (*iter); | |
| | | | |
| f << val.real() << ' ' << val.imag() << '\n'; | | f << val.real() << ' ' << val.imag() << '\n'; | |
| } | | } | |
| | | | |
| // make sure it's possible to figure out the matrix size later | | // make sure it's possible to figure out the matrix size later | |
| if( (x.n_rows > 0) && (x.n_cols > 0) ) | | if( (x.n_rows > 0) && (x.n_cols > 0) ) | |
| { | | { | |
| | | | |
| skipping to change at line 2961 | | skipping to change at line 2961 | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| uword cell_width; | | uword cell_width; | |
| | | | |
| // TODO: need sane values for complex numbers | | // TODO: need sane values for complex numbers | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| cell_width = 18; | | cell_width = 20; | |
| } | | } | |
| | | | |
| for(uword slice=0; slice < x.n_slices; ++slice) | | for(uword slice=0; slice < x.n_slices; ++slice) | |
| { | | { | |
| for(uword row=0; row < x.n_rows; ++row) | | for(uword row=0; row < x.n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| | | | |
| skipping to change at line 3082 | | skipping to change at line 3082 | |
| f << diskio::gen_txt_header(x) << '\n'; | | f << diskio::gen_txt_header(x) << '\n'; | |
| f << x.n_rows << ' ' << x.n_cols << ' ' << x.n_slices << '\n'; | | f << x.n_rows << ' ' << x.n_cols << ' ' << x.n_slices << '\n'; | |
| | | | |
| uword cell_width; | | uword cell_width; | |
| | | | |
| // TODO: need sane values for complex numbers | | // TODO: need sane values for complex numbers | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.setf(ios::scientific); | | f.setf(ios::scientific); | |
|
| f.precision(10); | | f.precision(12); | |
| cell_width = 18; | | cell_width = 20; | |
| } | | } | |
| | | | |
| for(uword slice=0; slice < x.n_slices; ++slice) | | for(uword slice=0; slice < x.n_slices; ++slice) | |
| { | | { | |
| for(uword row=0; row < x.n_rows; ++row) | | for(uword row=0; row < x.n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| | | | |
End of changes. 7 change blocks. |
| 11 lines changed or deleted | | 11 lines changed or added | |
|
| gemv.hpp | | gemv.hpp | |
|
| // Copyright (C) 2008-2012 NICTA (www.nicta.com.au) | | // Copyright (C) 2008-2013 NICTA (www.nicta.com.au) | |
| // Copyright (C) 2008-2012 Conrad Sanderson | | // Copyright (C) 2008-2013 Conrad Sanderson | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| //! \addtogroup gemv | | //! \addtogroup gemv | |
| //! @{ | | //! @{ | |
| | | | |
| //! for tiny square matrices, size <= 4x4 | | //! for tiny square matrices, size <= 4x4 | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
| | | | |
| skipping to change at line 120 | | skipping to change at line 120 | |
| } | | } | |
| break; | | break; | |
| | | | |
| default: | | default: | |
| ; | | ; | |
| } | | } | |
| } | | } | |
| | | | |
| }; | | }; | |
| | | | |
|
| | | class gemv_emul_large_helper | |
| | | { | |
| | | public: | |
| | | | |
| | | template<typename eT> | |
| | | arma_hot | |
| | | inline | |
| | | static | |
| | | typename arma_not_cx<eT>::result | |
| | | dot_row_col( const Mat<eT>& A, const eT* x, const uword row, const uword | |
| | | N) | |
| | | { | |
| | | eT acc1 = eT(0); | |
| | | eT acc2 = eT(0); | |
| | | | |
| | | uword i,j; | |
| | | for(i=0, j=1; j < N; i+=2, j+=2) | |
| | | { | |
| | | const eT xi = x[i]; | |
| | | const eT xj = x[j]; | |
| | | | |
| | | acc1 += A.at(row,i) * xi; | |
| | | acc2 += A.at(row,j) * xj; | |
| | | } | |
| | | | |
| | | if(i < N) | |
| | | { | |
| | | acc1 += A.at(row,i) * x[i]; | |
| | | } | |
| | | | |
| | | return (acc1 + acc2); | |
| | | } | |
| | | | |
| | | template<typename eT> | |
| | | arma_hot | |
| | | inline | |
| | | static | |
| | | typename arma_cx_only<eT>::result | |
| | | dot_row_col( const Mat<eT>& A, const eT* x, const uword row, const uword | |
| | | N) | |
| | | { | |
| | | typedef typename get_pod_type<eT>::result T; | |
| | | | |
| | | T val_real = T(0); | |
| | | T val_imag = T(0); | |
| | | | |
| | | for(uword i=0; i<N; ++i) | |
| | | { | |
| | | const std::complex<T>& Ai = A.at(row,i); | |
| | | const std::complex<T>& xi = x[i]; | |
| | | | |
| | | const T a = Ai.real(); | |
| | | const T b = Ai.imag(); | |
| | | | |
| | | const T c = xi.real(); | |
| | | const T d = xi.imag(); | |
| | | | |
| | | val_real += (a*c) - (b*d); | |
| | | val_imag += (a*d) + (b*c); | |
| | | } | |
| | | | |
| | | return std::complex<T>(val_real, val_imag); | |
| | | } | |
| | | | |
| | | }; | |
| | | | |
| //! \brief | | //! \brief | |
| //! Partial emulation of ATLAS/BLAS gemv(). | | //! Partial emulation of ATLAS/BLAS gemv(). | |
| //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | | //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | |
| | | | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
| class gemv_emul_large | | class gemv_emul_large | |
| { | | { | |
| public: | | public: | |
| | | | |
| template<typename eT> | | template<typename eT> | |
| | | | |
| skipping to change at line 170 | | skipping to change at line 234 | |
| } | | } | |
| else | | else | |
| if( (use_alpha == true) && (use_beta == true) ) | | if( (use_alpha == true) && (use_beta == true) ) | |
| { | | { | |
| y[0] = alpha*acc + beta*y[0]; | | y[0] = alpha*acc + beta*y[0]; | |
| } | | } | |
| } | | } | |
| else | | else | |
| for(uword row=0; row < A_n_rows; ++row) | | for(uword row=0; row < A_n_rows; ++row) | |
| { | | { | |
|
| eT acc = eT(0); | | const eT acc = gemv_emul_large_helper::dot_row_col(A, x, row, A_n_c | |
| | | ols); | |
| for(uword i=0; i < A_n_cols; ++i) | | | |
| { | | | |
| acc += A.at(row,i) * x[i]; | | | |
| } | | | |
| | | | |
| if( (use_alpha == false) && (use_beta == false) ) | | if( (use_alpha == false) && (use_beta == false) ) | |
| { | | { | |
| y[row] = acc; | | y[row] = acc; | |
| } | | } | |
| else | | else | |
| if( (use_alpha == true) && (use_beta == false) ) | | if( (use_alpha == true) && (use_beta == false) ) | |
| { | | { | |
| y[row] = alpha * acc; | | y[row] = alpha * acc; | |
| } | | } | |
| | | | |
| skipping to change at line 319 | | skipping to change at line 378 | |
| public: | | public: | |
| | | | |
| template<typename eT> | | template<typename eT> | |
| inline | | inline | |
| static | | static | |
| void | | void | |
| apply_blas_type( eT* y, const Mat<eT>& A, const eT* x, const eT alpha = e
T(1), const eT beta = eT(0) ) | | apply_blas_type( eT* y, const Mat<eT>& A, const eT* x, const eT alpha = e
T(1), const eT beta = eT(0) ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
|
| const uword threshold = (is_complex<eT>::value == true) ? 16u : 64u; | | //const uword threshold = (is_complex<eT>::value == true) ? 16u : 64u; | |
| | | const uword threshold = (is_complex<eT>::value == true) ? 64u : 100u; | |
| | | | |
| if(A.n_elem <= threshold) | | if(A.n_elem <= threshold) | |
| { | | { | |
| gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta); | | gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta); | |
| } | | } | |
| else | | else | |
| { | | { | |
| #if defined(ARMA_USE_ATLAS) | | #if defined(ARMA_USE_ATLAS) | |
| { | | { | |
|
| arma_extra_debug_print("atlas::cblas_gemv()"); | | if(is_complex<eT>::value == false) | |
| | | { | |
| | | // use gemm() instead of gemv() to work around a speed issue in A | |
| | | tlas 3.8.4 | |
| | | | |
| | | arma_extra_debug_print("atlas::cblas_gemm()"); | |
| | | | |
|
| atlas::cblas_gemv<eT> | | atlas::cblas_gemm<eT> | |
| ( | | ( | |
| atlas::CblasColMajor, | | atlas::CblasColMajor, | |
| (do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas:: | | (do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas | |
| CblasTrans ) : atlas::CblasNoTrans, | | ::CblasTrans ) : atlas::CblasNoTrans, | |
| A.n_rows, | | atlas::CblasNoTrans, | |
| A.n_cols, | | (do_trans_A) ? A.n_cols : A.n_rows, | |
| (use_alpha) ? alpha : eT(1), | | 1, | |
| A.mem, | | (do_trans_A) ? A.n_rows : A.n_cols, | |
| A.n_rows, | | (use_alpha) ? alpha : eT(1), | |
| x, | | A.mem, | |
| 1, | | A.n_rows, | |
| (use_beta) ? beta : eT(0), | | x, | |
| y, | | (do_trans_A) ? A.n_rows : A.n_cols, | |
| 1 | | (use_beta) ? beta : eT(0), | |
| ); | | y, | |
| | | (do_trans_A) ? A.n_cols : A.n_rows | |
| | | ); | |
| | | } | |
| | | else | |
| | | { | |
| | | arma_extra_debug_print("atlas::cblas_gemv()"); | |
| | | | |
| | | atlas::cblas_gemv<eT> | |
| | | ( | |
| | | atlas::CblasColMajor, | |
| | | (do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas | |
| | | ::CblasTrans ) : atlas::CblasNoTrans, | |
| | | A.n_rows, | |
| | | A.n_cols, | |
| | | (use_alpha) ? alpha : eT(1), | |
| | | A.mem, | |
| | | A.n_rows, | |
| | | x, | |
| | | 1, | |
| | | (use_beta) ? beta : eT(0), | |
| | | y, | |
| | | 1 | |
| | | ); | |
| | | } | |
| } | | } | |
| #elif defined(ARMA_USE_BLAS) | | #elif defined(ARMA_USE_BLAS) | |
| { | | { | |
| arma_extra_debug_print("blas::gemv()"); | | arma_extra_debug_print("blas::gemv()"); | |
| | | | |
| const char trans_A = (do_trans_A) ? ( is_complex<eT>::valu
e ? 'C' : 'T' ) : 'N'; | | const char trans_A = (do_trans_A) ? ( is_complex<eT>::valu
e ? 'C' : 'T' ) : 'N'; | |
| const blas_int m = A.n_rows; | | const blas_int m = A.n_rows; | |
| const blas_int n = A.n_cols; | | const blas_int n = A.n_cols; | |
| const eT local_alpha = (use_alpha) ? alpha : eT(1); | | const eT local_alpha = (use_alpha) ? alpha : eT(1); | |
| //const blas_int lda = A.n_rows; | | //const blas_int lda = A.n_rows; | |
| | | | |
End of changes. 6 change blocks. |
| 26 lines changed or deleted | | 118 lines changed or added | |
|