| auxlib_bones.hpp | | auxlib_bones.hpp | |
| | | | |
| skipping to change at line 167 | | skipping to change at line 167 | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline static bool svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Bas
e<eT,T1>& X, const char mode); | | inline static bool svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Bas
e<eT,T1>& X, const char mode); | |
| | | | |
| template<typename T, typename T1> | | template<typename T, typename T1> | |
| inline static bool svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< st
d::complex<T> >& V, const Base< std::complex<T>, T1>& X, const char mode); | | inline static bool svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< st
d::complex<T> >& V, const Base< std::complex<T>, T1>& X, const char mode); | |
| | | | |
| // | | // | |
| // solve | | // solve | |
| | | | |
|
| template<typename eT> | | | |
| inline static bool solve (Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B, c | | | |
| onst bool slow = false); | | | |
| | | | |
| template<typename eT> | | | |
| inline static bool solve_od(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B); | | | |
| | | | |
| template<typename eT> | | | |
| inline static bool solve_ud(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B); | | | |
| | | | |
| // | | | |
| // solve_new | | | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
|
| inline static bool solve_new (Mat<eT>& out, Mat<eT>& A, const Base<eT,T
1>& X, const bool slow = false); | | inline static bool solve (Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>&
X, const bool slow = false); | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
|
| inline static bool solve_new_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T
1>& X); | | inline static bool solve_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>&
X); | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
|
| inline static bool solve_new_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T
1>& X); | | inline static bool solve_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>&
X); | |
| | | | |
| // | | // | |
| // solve_tr | | // solve_tr | |
| | | | |
| template<typename eT> | | template<typename eT> | |
| inline static bool solve_tr(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>
& B, const uword layout); | | inline static bool solve_tr(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>
& B, const uword layout); | |
| | | | |
| // | | // | |
| // Schur decomposition | | // Schur decomposition | |
| | | | |
| | | | |
End of changes. 4 change blocks. |
| 16 lines changed or deleted | | 3 lines changed or added | |
|
| auxlib_meat.hpp | | auxlib_meat.hpp | |
| | | | |
| skipping to change at line 2418 | | skipping to change at line 2418 | |
| arma_ignore(X); | | arma_ignore(X); | |
| arma_ignore(mode); | | arma_ignore(mode); | |
| arma_stop("svd(): use of LAPACK needs to be enabled"); | | arma_stop("svd(): use of LAPACK needs to be enabled"); | |
| return false; | | return false; | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| //! Solve a system of linear equations. | | //! Solve a system of linear equations. | |
| //! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | | //! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | |
|
| template<typename eT> | | | |
| inline | | | |
| bool | | | |
| auxlib::solve(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B, const bool slow) | | | |
| { | | | |
| arma_extra_debug_sigprint(); | | | |
| | | | |
| const uword A_n_rows = A.n_rows; | | | |
| const uword B_n_cols = B.n_cols; | | | |
| | | | |
| if(A.is_empty() || B.is_empty()) | | | |
| { | | | |
| out.zeros(A.n_cols, B_n_cols); | | | |
| return true; | | | |
| } | | | |
| else | | | |
| { | | | |
| bool status = false; | | | |
| | | | |
| if( (A_n_rows <= 4) && (slow == false) ) | | | |
| { | | | |
| Mat<eT> A_inv; | | | |
| | | | |
| status = auxlib::inv_noalias_tinymat(A_inv, A, A_n_rows); | | | |
| | | | |
| if(status == true) | | | |
| { | | | |
| out.set_size(A_n_rows, B_n_cols); | | | |
| | | | |
| gemm_emul<false,false,false,false>::apply(out, A_inv, B); | | | |
| | | | |
| return true; | | | |
| } | | | |
| } | | | |
| | | | |
| if( (A_n_rows > 4) || (status == false) ) | | | |
| { | | | |
| #if defined(ARMA_USE_ATLAS) | | | |
| { | | | |
| out = B; | | | |
| | | | |
| podarray<int> ipiv(A_n_rows + 2); // +2 for paranoia: old versions | | | |
| of Atlas might be trashing memory | | | |
| | | | |
| int info = atlas::clapack_gesv<eT>(atlas::CblasColMajor, A_n_rows, | | | |
| B_n_cols, A.memptr(), A_n_rows, ipiv.memptr(), out.memptr(), A_n_rows); | | | |
| | | | |
| return (info == 0); | | | |
| } | | | |
| #elif defined(ARMA_USE_LAPACK) | | | |
| { | | | |
| out = B; | | | |
| | | | |
| blas_int n = blas_int(A_n_rows); // assuming A is square | | | |
| blas_int lda = blas_int(A_n_rows); | | | |
| blas_int ldb = blas_int(A_n_rows); | | | |
| blas_int nrhs = blas_int(B_n_cols); | | | |
| blas_int info = 0; | | | |
| | | | |
| podarray<blas_int> ipiv(A_n_rows + 2); // +2 for paranoia: some ve | | | |
| rsions of Lapack might be trashing memory | | | |
| | | | |
| arma_extra_debug_print("lapack::gesv()"); | | | |
| lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.me | | | |
| mptr(), &ldb, &info); | | | |
| | | | |
| arma_extra_debug_print("lapack::gesv() -- finished"); | | | |
| | | | |
| return (info == 0); | | | |
| } | | | |
| #else | | | |
| { | | | |
| arma_stop("solve(): use of ATLAS or LAPACK needs to be enabled"); | | | |
| return false; | | | |
| } | | | |
| #endif | | | |
| } | | | |
| } | | | |
| | | | |
| return true; | | | |
| } | | | |
| | | | |
| //! Solve an over-determined system. | | | |
| //! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | | | |
| template<typename eT> | | | |
| inline | | | |
| bool | | | |
| auxlib::solve_od(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B) | | | |
| { | | | |
| arma_extra_debug_sigprint(); | | | |
| | | | |
| #if defined(ARMA_USE_LAPACK) | | | |
| { | | | |
| const uword A_n_rows = A.n_rows; | | | |
| const uword A_n_cols = A.n_cols; | | | |
| | | | |
| const uword B_n_rows = B.n_rows; | | | |
| const uword B_n_cols = B.n_cols; | | | |
| | | | |
| out.set_size(A_n_cols, B_n_cols); | | | |
| | | | |
| if(A.is_empty() || B.is_empty()) | | | |
| { | | | |
| out.zeros(); | | | |
| return true; | | | |
| } | | | |
| | | | |
| char trans = 'N'; | | | |
| | | | |
| blas_int m = blas_int(A_n_rows); | | | |
| blas_int n = blas_int(A_n_cols); | | | |
| blas_int lda = blas_int(A_n_rows); | | | |
| blas_int ldb = blas_int(A_n_rows); | | | |
| blas_int nrhs = blas_int(B_n_cols); | | | |
| blas_int lwork = 2*((std::max)(blas_int(1), n + (std::max)(n, nrhs))); | | | |
| blas_int info = 0; | | | |
| | | | |
| Mat<eT> tmp = B; | | | |
| | | | |
| podarray<eT> work( static_cast<uword>(lwork) ); | | | |
| | | | |
| // NOTE: the dgels() function in the lapack library supplied by ATLAS 3 | | | |
| .6 seems to have problems | | | |
| arma_extra_debug_print("lapack::gels()"); | | | |
| lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr() | | | |
| , &ldb, work.memptr(), &lwork, &info ); | | | |
| | | | |
| arma_extra_debug_print("lapack::gels() -- finished"); | | | |
| | | | |
| for(uword col=0; col<B_n_cols; ++col) | | | |
| { | | | |
| arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); | | | |
| } | | | |
| | | | |
| return (info == 0); | | | |
| } | | | |
| #else | | | |
| { | | | |
| arma_ignore(out); | | | |
| arma_ignore(A); | | | |
| arma_ignore(B); | | | |
| arma_stop("solve(): use of LAPACK needs to be enabled"); | | | |
| return false; | | | |
| } | | | |
| #endif | | | |
| } | | | |
| | | | |
| //! Solve an under-determined system. | | | |
| //! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | | | |
| template<typename eT> | | | |
| inline | | | |
| bool | | | |
| auxlib::solve_ud(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B) | | | |
| { | | | |
| arma_extra_debug_sigprint(); | | | |
| | | | |
| #if defined(ARMA_USE_LAPACK) | | | |
| { | | | |
| const uword A_n_rows = A.n_rows; | | | |
| const uword A_n_cols = A.n_cols; | | | |
| | | | |
| const uword B_n_rows = B.n_rows; | | | |
| const uword B_n_cols = B.n_cols; | | | |
| | | | |
| out.set_size(A_n_cols, B_n_cols); | | | |
| | | | |
| if(A.is_empty() || B.is_empty()) | | | |
| { | | | |
| out.zeros(); | | | |
| return true; | | | |
| } | | | |
| | | | |
| char trans = 'N'; | | | |
| | | | |
| blas_int m = blas_int(A_n_rows); | | | |
| blas_int n = blas_int(A_n_cols); | | | |
| blas_int lda = blas_int(A_n_rows); | | | |
| blas_int ldb = blas_int(A_n_cols); | | | |
| blas_int nrhs = blas_int(B_n_cols); | | | |
| blas_int lwork = 2*((std::max)(blas_int(1), m + (std::max)(m,nrhs))); | | | |
| blas_int info = 0; | | | |
| | | | |
| Mat<eT> tmp(A_n_cols, B_n_cols); | | | |
| tmp.zeros(); | | | |
| | | | |
| for(uword col=0; col<B_n_cols; ++col) | | | |
| { | | | |
| eT* tmp_colmem = tmp.colptr(col); | | | |
| | | | |
| arrayops::copy( tmp_colmem, B.colptr(col), B_n_rows ); | | | |
| | | | |
| for(uword row=B_n_rows; row<A_n_cols; ++row) | | | |
| { | | | |
| tmp_colmem[row] = eT(0); | | | |
| } | | | |
| } | | | |
| | | | |
| podarray<eT> work( static_cast<uword>(lwork) ); | | | |
| | | | |
| // NOTE: the dgels() function in the lapack library supplied by ATLAS 3 | | | |
| .6 seems to have problems | | | |
| arma_extra_debug_print("lapack::gels()"); | | | |
| lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr() | | | |
| , &ldb, work.memptr(), &lwork, &info ); | | | |
| | | | |
| arma_extra_debug_print("lapack::gels() -- finished"); | | | |
| | | | |
| for(uword col=0; col<B_n_cols; ++col) | | | |
| { | | | |
| arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); | | | |
| } | | | |
| | | | |
| return (info == 0); | | | |
| } | | | |
| #else | | | |
| { | | | |
| arma_ignore(out); | | | |
| arma_ignore(A); | | | |
| arma_ignore(B); | | | |
| arma_stop("solve(): use of LAPACK needs to be enabled"); | | | |
| return false; | | | |
| } | | | |
| #endif | | | |
| } | | | |
| | | | |
| //! Solve a system of linear equations. | | | |
| //! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline | | inline | |
| bool | | bool | |
|
| auxlib::solve_new(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const boo
l slow) | | auxlib::solve(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const bool sl
ow) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| bool status = false; | | bool status = false; | |
| | | | |
| const uword A_n_rows = A.n_rows; | | const uword A_n_rows = A.n_rows; | |
| | | | |
| if( (A_n_rows <= 4) && (slow == false) ) | | if( (A_n_rows <= 4) && (slow == false) ) | |
| { | | { | |
| Mat<eT> A_inv; | | Mat<eT> A_inv; | |
| | | | |
| skipping to change at line 2734 | | skipping to change at line 2515 | |
| } | | } | |
| | | | |
| return true; | | return true; | |
| } | | } | |
| | | | |
| //! Solve an over-determined system. | | //! Solve an over-determined system. | |
| //! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | | //! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline | | inline | |
| bool | | bool | |
|
| auxlib::solve_new_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | | auxlib::solve_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| #if defined(ARMA_USE_LAPACK) | | #if defined(ARMA_USE_LAPACK) | |
| { | | { | |
| Mat<eT> tmp = X.get_ref(); | | Mat<eT> tmp = X.get_ref(); | |
| | | | |
| const uword A_n_rows = A.n_rows; | | const uword A_n_rows = A.n_rows; | |
| const uword A_n_cols = A.n_cols; | | const uword A_n_cols = A.n_cols; | |
| | | | |
| | | | |
| skipping to change at line 2799 | | skipping to change at line 2580 | |
| return false; | | return false; | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| //! Solve an under-determined system. | | //! Solve an under-determined system. | |
| //! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | | //! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline | | inline | |
| bool | | bool | |
|
| auxlib::solve_new_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | | auxlib::solve_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
|
| | | // TODO: this function provides the same results as Octave 3.4.2. | |
| | | // TODO: however, these results are different than Matlab 7.12.0.635. | |
| | | // TODO: figure out whether both Octave and Matlab are correct, or only o | |
| | | ne of them | |
| | | | |
| #if defined(ARMA_USE_LAPACK) | | #if defined(ARMA_USE_LAPACK) | |
| { | | { | |
| const unwrap<T1> Y( X.get_ref() ); | | const unwrap<T1> Y( X.get_ref() ); | |
| const Mat<eT>& B = Y.M; | | const Mat<eT>& B = Y.M; | |
| | | | |
| const uword A_n_rows = A.n_rows; | | const uword A_n_rows = A.n_rows; | |
| const uword A_n_cols = A.n_cols; | | const uword A_n_cols = A.n_cols; | |
| | | | |
| const uword B_n_rows = B.n_rows; | | const uword B_n_rows = B.n_rows; | |
| const uword B_n_cols = B.n_cols; | | const uword B_n_cols = B.n_cols; | |
| | | | |
End of changes. 5 change blocks. |
| 230 lines changed or deleted | | 8 lines changed or added | |
|
| config.hpp | | config.hpp | |
| | | | |
| skipping to change at line 13 | | skipping to change at line 13 | |
| // | | // | |
| // This file is part of the Armadillo C++ library. | | // This file is part of the Armadillo C++ library. | |
| // It is provided without any warranty of fitness | | // It is provided without any warranty of fitness | |
| // for any purpose. You can redistribute this file | | // for any purpose. You can redistribute this file | |
| // and/or modify it under the terms of the GNU | | // and/or modify it under the terms of the GNU | |
| // Lesser General Public License (LGPL) as published | | // Lesser General Public License (LGPL) as published | |
| // by the Free Software Foundation, either version 3 | | // by the Free Software Foundation, either version 3 | |
| // of the License or (at your option) any later version. | | // of the License or (at your option) any later version. | |
| // (see http://www.opensource.org/licenses for more info) | | // (see http://www.opensource.org/licenses for more info) | |
| | | | |
|
| // #define ARMA_64BIT_WORD | | | |
| //// Uncomment the above line if you require matrices/vectors capable of ho | | | |
| lding more than 4 billion elements. | | | |
| //// Your machine and compiler must have support for 64 bit integers (eg. v | | | |
| ia "long" or "long long") | | | |
| | | | |
| // #define ARMA_USE_CXX11 | | | |
| //// Uncomment the above line if you have a C++ compiler that supports the | | | |
| C++11 standard | | | |
| //// This will enable additional features, such as use of initialiser lists | | | |
| | | | |
| #if !defined(ARMA_USE_LAPACK) | | #if !defined(ARMA_USE_LAPACK) | |
| #define ARMA_USE_LAPACK | | #define ARMA_USE_LAPACK | |
|
| //// Uncomment the above line if you have LAPACK or a fast replacement for
LAPACK, | | //// Uncomment the above line if you have LAPACK or a high-speed replacemen
t for LAPACK, | |
| //// such as Intel's MKL, AMD's ACML, or the Accelerate framework. | | //// such as Intel's MKL, AMD's ACML, or the Accelerate framework. | |
| //// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve
rse. | | //// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve
rse. | |
| #endif | | #endif | |
| | | | |
| #if !defined(ARMA_USE_BLAS) | | #if !defined(ARMA_USE_BLAS) | |
| #define ARMA_USE_BLAS | | #define ARMA_USE_BLAS | |
|
| //// Uncomment the above line if you have BLAS or a fast replacement for BL
AS, | | //// Uncomment the above line if you have BLAS or a high-speed replacement
for BLAS, | |
| //// such as GotoBLAS, Intel's MKL, AMD's ACML, or the Accelerate framework
. | | //// such as GotoBLAS, Intel's MKL, AMD's ACML, or the Accelerate framework
. | |
| //// BLAS is used for matrix multiplication. | | //// BLAS is used for matrix multiplication. | |
| //// Without BLAS, matrix multiplication will still work, but might be slow
er. | | //// Without BLAS, matrix multiplication will still work, but might be slow
er. | |
| #endif | | #endif | |
| | | | |
|
| // #define ARMA_BLAS_LONG | | // #define ARMA_BLAS_CAPITALS | |
| //// Uncomment the above line if your BLAS and LAPACK libraries use "long" | | //// Uncomment the above line if your BLAS and LAPACK libraries have capita | |
| instead of "int" | | lised function names (eg. ACML on 64-bit Windows) | |
| | | | |
| // #define ARMA_BLAS_LONG_LONG | | | |
| //// Uncomment the above line if your BLAS and LAPACK libraries use "long l | | | |
| ong" instead of "int" | | | |
| | | | |
| #define ARMA_BLAS_UNDERSCORE | | #define ARMA_BLAS_UNDERSCORE | |
| //// Uncomment the above line if your BLAS and LAPACK libraries have functi
on names with a trailing underscore. | | //// Uncomment the above line if your BLAS and LAPACK libraries have functi
on names with a trailing underscore. | |
| //// Conversely, comment it out if the function names don't have a trailing
underscore. | | //// Conversely, comment it out if the function names don't have a trailing
underscore. | |
| | | | |
|
| // #define ARMA_BLAS_CAPITALS | | // #define ARMA_BLAS_LONG | |
| //// Uncomment the above line if your BLAS and LAPACK libraries have capita | | //// Uncomment the above line if your BLAS and LAPACK libraries use "long" | |
| lised function names (eg. ACML on 64-bit Windows) | | instead of "int" | |
| | | | |
|
| #if !defined(ARMA_MAT_PREALLOC) | | // #define ARMA_BLAS_LONG_LONG | |
| #define ARMA_MAT_PREALLOC 16 | | //// Uncomment the above line if your BLAS and LAPACK libraries use "long l | |
| #endif | | ong" instead of "int" | |
| //// This is the number of preallocated elements used by matrices and vecto | | | |
| rs; | | | |
| //// it must be an integer that is at least 1. | | | |
| //// If you mainly use lots of very small vectors (eg. <= 4 elements), | | | |
| //// change the number to the size of your vectors. | | | |
| | | | |
| // #define ARMA_USE_TBB_ALLOC | | // #define ARMA_USE_TBB_ALLOC | |
| //// Uncomment the above line if you want to use Intel TBB scalable_malloc(
) and scalable_free() instead of standard new[] and delete[] | | //// Uncomment the above line if you want to use Intel TBB scalable_malloc(
) and scalable_free() instead of standard new[] and delete[] | |
| | | | |
| // #define ARMA_USE_MKL_ALLOC | | // #define ARMA_USE_MKL_ALLOC | |
| //// Uncomment the above line if you want to use Intel MKL mkl_malloc() and
mkl_free() instead of standard new[] and delete[] | | //// Uncomment the above line if you want to use Intel MKL mkl_malloc() and
mkl_free() instead of standard new[] and delete[] | |
| | | | |
| #define ARMA_USE_ATLAS | | #define ARMA_USE_ATLAS | |
| #define ARMA_ATLAS_INCLUDE_DIR /usr/include/ | | #define ARMA_ATLAS_INCLUDE_DIR /usr/include/ | |
| //// If you're using ATLAS and the compiler can't find cblas.h and/or clapa
ck.h | | //// If you're using ATLAS and the compiler can't find cblas.h and/or clapa
ck.h | |
| //// uncomment the above define and specify the appropriate include directo
ry. | | //// uncomment the above define and specify the appropriate include directo
ry. | |
| //// Make sure the directory has a trailing / | | //// Make sure the directory has a trailing / | |
| | | | |
|
| | | // #define ARMA_64BIT_WORD | |
| | | //// Uncomment the above line if you require matrices/vectors capable of ho | |
| | | lding more than 4 billion elements. | |
| | | //// Your machine and compiler must have support for 64 bit integers (eg. v | |
| | | ia "long" or "long long") | |
| | | | |
| | | // #define ARMA_USE_CXX11 | |
| | | //// Uncomment the above line if you have a C++ compiler that supports the | |
| | | C++11 standard | |
| | | //// This will enable additional features, such as use of initialiser lists | |
| | | | |
| | | #if !defined(ARMA_MAT_PREALLOC) | |
| | | #define ARMA_MAT_PREALLOC 16 | |
| | | #endif | |
| | | //// This is the number of preallocated elements used by matrices and vecto | |
| | | rs; | |
| | | //// it must be an integer that is at least 1. | |
| | | //// If you mainly use lots of very small vectors (eg. <= 4 elements), | |
| | | //// change the number to the size of your vectors. | |
| | | | |
| | | // #define ARMA_NO_DEBUG | |
| | | //// Uncomment the above line if you want to disable all run-time checks. | |
| | | //// This will result in faster code, but you first need to make sure that | |
| | | your code runs correctly! | |
| | | //// We strongly recommend to have the run-time checks enabled during devel | |
| | | opment, | |
| | | //// as this greatly aids in finding mistakes in your code, and hence speed | |
| | | s up development. | |
| | | //// We recommend that run-time checks be disabled _only_ for the shipped v | |
| | | ersion of your program. | |
| | | | |
| | | // #define ARMA_EXTRA_DEBUG | |
| | | //// Uncomment the above line if you want to see the function traces of how | |
| | | Armadillo evaluates expressions. | |
| | | //// This is mainly useful for debugging of the library. | |
| | | | |
| #define ARMA_USE_BOOST | | #define ARMA_USE_BOOST | |
| #define ARMA_USE_BOOST_DATE | | #define ARMA_USE_BOOST_DATE | |
| #define ARMA_USE_WRAPPER | | #define ARMA_USE_WRAPPER | |
| /* #undef ARMA_USE_HDF5 */ | | /* #undef ARMA_USE_HDF5 */ | |
| | | | |
| #if !defined(ARMA_DEFAULT_OSTREAM) | | #if !defined(ARMA_DEFAULT_OSTREAM) | |
| #define ARMA_DEFAULT_OSTREAM std::cout | | #define ARMA_DEFAULT_OSTREAM std::cout | |
| #endif | | #endif | |
| | | | |
| #define ARMA_PRINT_LOGIC_ERRORS | | #define ARMA_PRINT_LOGIC_ERRORS | |
| #define ARMA_PRINT_RUNTIME_ERRORS | | #define ARMA_PRINT_RUNTIME_ERRORS | |
| | | | |
| #define ARMA_HAVE_STD_ISFINITE | | #define ARMA_HAVE_STD_ISFINITE | |
| #define ARMA_HAVE_STD_ISINF | | #define ARMA_HAVE_STD_ISINF | |
| #define ARMA_HAVE_STD_ISNAN | | #define ARMA_HAVE_STD_ISNAN | |
| #define ARMA_HAVE_STD_SNPRINTF | | #define ARMA_HAVE_STD_SNPRINTF | |
| | | | |
| #define ARMA_HAVE_LOG1P | | #define ARMA_HAVE_LOG1P | |
| #define ARMA_HAVE_GETTIMEOFDAY | | #define ARMA_HAVE_GETTIMEOFDAY | |
| | | | |
|
| // #define ARMA_EXTRA_DEBUG | | | |
| // #define ARMA_NO_DEBUG | | | |
| | | | |
| #if defined(ARMA_DONT_USE_ATLAS) | | | |
| #undef ARMA_USE_ATLAS | | | |
| #undef ARMA_ATLAS_INCLUDE_DIR | | | |
| #endif | | | |
| | | | |
| #if defined(ARMA_DONT_USE_LAPACK) | | #if defined(ARMA_DONT_USE_LAPACK) | |
| #undef ARMA_USE_LAPACK | | #undef ARMA_USE_LAPACK | |
| #endif | | #endif | |
| | | | |
| #if defined(ARMA_DONT_USE_BLAS) | | #if defined(ARMA_DONT_USE_BLAS) | |
| #undef ARMA_USE_BLAS | | #undef ARMA_USE_BLAS | |
| #endif | | #endif | |
| | | | |
|
| | | #if defined(ARMA_DONT_USE_ATLAS) | |
| | | #undef ARMA_USE_ATLAS | |
| | | #undef ARMA_ATLAS_INCLUDE_DIR | |
| | | #endif | |
| | | | |
| #if defined(ARMA_DONT_PRINT_LOGIC_ERRORS) | | #if defined(ARMA_DONT_PRINT_LOGIC_ERRORS) | |
| #undef ARMA_PRINT_LOGIC_ERRORS | | #undef ARMA_PRINT_LOGIC_ERRORS | |
| #endif | | #endif | |
| | | | |
| #if defined(ARMA_DONT_PRINT_RUNTIME_ERRORS) | | #if defined(ARMA_DONT_PRINT_RUNTIME_ERRORS) | |
| #undef ARMA_PRINT_RUNTIME_ERRORS | | #undef ARMA_PRINT_RUNTIME_ERRORS | |
| #endif | | #endif | |
| | | | |
End of changes. 9 change blocks. |
| 39 lines changed or deleted | | 52 lines changed or added | |
|
| eglue_core_meat.hpp | | eglue_core_meat.hpp | |
| | | | |
| skipping to change at line 149 | | skipping to change at line 149 | |
| | | | |
| const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | |
| | | | |
| // NOTE: we're assuming that the matrix has already been set to the corre
ct size and there is no aliasing; | | // NOTE: we're assuming that the matrix has already been set to the corre
ct size and there is no aliasing; | |
| // size setting and alias checking is done by either the Mat contructor o
r operator=() | | // size setting and alias checking is done by either the Mat contructor o
r operator=() | |
| | | | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(prefer_at_accessor == false) | | if(prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |
| | | x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |
| typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |
| | | | |
| if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(=, +); } | | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(=, +); } | |
| else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(=, -); } | | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(=, -); } | |
| else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(=, /); } | | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(=, /); } | |
| else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(=, *); } | | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(=, *); } | |
| } | | } | |
| else | | else | |
| | | | |
| skipping to change at line 197 | | skipping to change at line 197 | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi
tion"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi
tion"); | |
| | | | |
| typedef typename T1::elem_type eT; | | typedef typename T1::elem_type eT; | |
| | | | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | |
| | | | |
| if(prefer_at_accessor == false) | | if(prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |
| | | x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |
| typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |
| | | | |
| if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(+=, +); } | | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(+=, +); } | |
| else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(+=, -); } | | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(+=, -); } | |
| else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(+=, /); } | | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(+=, /); } | |
| else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(+=, *); } | | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(+=, *); } | |
| } | | } | |
| else | | else | |
| | | | |
| skipping to change at line 242 | | skipping to change at line 242 | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt
raction"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt
raction"); | |
| | | | |
| typedef typename T1::elem_type eT; | | typedef typename T1::elem_type eT; | |
| | | | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | |
| | | | |
| if(prefer_at_accessor == false) | | if(prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |
| | | x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |
| typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |
| | | | |
| if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(-=, +); } | | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(-=, +); } | |
| else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(-=, -); } | | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(-=, -); } | |
| else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(-=, /); } | | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(-=, /); } | |
| else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(-=, *); } | | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(-=, *); } | |
| } | | } | |
| else | | else | |
| | | | |
| skipping to change at line 287 | | skipping to change at line 287 | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise multiplication"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise multiplication"); | |
| | | | |
| typedef typename T1::elem_type eT; | | typedef typename T1::elem_type eT; | |
| | | | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | |
| | | | |
| if(prefer_at_accessor == false) | | if(prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |
| | | x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |
| typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |
| | | | |
| if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(*=, +); } | | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(*=, +); } | |
| else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(*=, -); } | | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(*=, -); } | |
| else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(*=, /); } | | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(*=, /); } | |
| else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(*=, *); } | | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(*=, *); } | |
| } | | } | |
| else | | else | |
| | | | |
| skipping to change at line 332 | | skipping to change at line 332 | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise division"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise division"); | |
| | | | |
| typedef typename T1::elem_type eT; | | typedef typename T1::elem_type eT; | |
| | | | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T
2>::prefer_at_accessor); | |
| | | | |
| if(prefer_at_accessor == false) | | if(prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |
| | | x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |
| typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |
| | | | |
| if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(/=, +); } | | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap
plier_1(/=, +); } | |
| else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(/=, -); } | | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap
plier_1(/=, -); } | |
| else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(/=, /); } | | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap
plier_1(/=, /); } | |
| else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(/=, *); } | | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap
plier_1(/=, *); } | |
| } | | } | |
| else | | else | |
| | | | |
End of changes. 5 change blocks. |
| 15 lines changed or deleted | | 20 lines changed or added | |
|
| eop_core_meat.hpp | | eop_core_meat.hpp | |
| | | | |
| skipping to change at line 134 | | skipping to change at line 134 | |
| typedef typename T1::elem_type eT; | | typedef typename T1::elem_type eT; | |
| | | | |
| // NOTE: we're assuming that the matrix has already been set to the corre
ct size and there is no aliasing; | | // NOTE: we're assuming that the matrix has already been set to the corre
ct size and there is no aliasing; | |
| // size setting and alias checking is done by either the Mat contructor o
r operator=() | | // size setting and alias checking is done by either the Mat contructor o
r operator=() | |
| | | | |
| const eT k = x.aux; | | const eT k = x.aux; | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(Proxy<T1>::prefer_at_accessor == false) | | if(Proxy<T1>::prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |
| | | x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P = x.P.get_ea(); | | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |
| | | | |
| arma_applier_1(=); | | arma_applier_1(=); | |
| } | | } | |
| else | | else | |
| { | | { | |
| const uword n_rows = x.get_n_rows(); | | const uword n_rows = x.get_n_rows(); | |
| const uword n_cols = x.get_n_cols(); | | const uword n_cols = x.get_n_cols(); | |
| | | | |
| | | | |
| skipping to change at line 173 | | skipping to change at line 173 | |
| const uword n_rows = x.get_n_rows(); | | const uword n_rows = x.get_n_rows(); | |
| const uword n_cols = x.get_n_cols(); | | const uword n_cols = x.get_n_cols(); | |
| | | | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi
tion"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi
tion"); | |
| | | | |
| const eT k = x.aux; | | const eT k = x.aux; | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(Proxy<T1>::prefer_at_accessor == false) | | if(Proxy<T1>::prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |
| | | x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P = x.P.get_ea(); | | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |
| | | | |
| arma_applier_1(+=); | | arma_applier_1(+=); | |
| } | | } | |
| else | | else | |
| { | | { | |
| const Proxy<T1>& P = x.P; | | const Proxy<T1>& P = x.P; | |
| | | | |
| arma_applier_2(+=); | | arma_applier_2(+=); | |
| | | | |
| skipping to change at line 209 | | skipping to change at line 209 | |
| const uword n_rows = x.get_n_rows(); | | const uword n_rows = x.get_n_rows(); | |
| const uword n_cols = x.get_n_cols(); | | const uword n_cols = x.get_n_cols(); | |
| | | | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt
raction"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt
raction"); | |
| | | | |
| const eT k = x.aux; | | const eT k = x.aux; | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(Proxy<T1>::prefer_at_accessor == false) | | if(Proxy<T1>::prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |
| | | x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P = x.P.get_ea(); | | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |
| | | | |
| arma_applier_1(-=); | | arma_applier_1(-=); | |
| } | | } | |
| else | | else | |
| { | | { | |
| const Proxy<T1>& P = x.P; | | const Proxy<T1>& P = x.P; | |
| | | | |
| arma_applier_2(-=); | | arma_applier_2(-=); | |
| | | | |
| skipping to change at line 245 | | skipping to change at line 245 | |
| const uword n_rows = x.get_n_rows(); | | const uword n_rows = x.get_n_rows(); | |
| const uword n_cols = x.get_n_cols(); | | const uword n_cols = x.get_n_cols(); | |
| | | | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise multiplication"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise multiplication"); | |
| | | | |
| const eT k = x.aux; | | const eT k = x.aux; | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(Proxy<T1>::prefer_at_accessor == false) | | if(Proxy<T1>::prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |
| | | x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P = x.P.get_ea(); | | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |
| | | | |
| arma_applier_1(*=); | | arma_applier_1(*=); | |
| } | | } | |
| else | | else | |
| { | | { | |
| const Proxy<T1>& P = x.P; | | const Proxy<T1>& P = x.P; | |
| | | | |
| arma_applier_2(*=); | | arma_applier_2(*=); | |
| | | | |
| skipping to change at line 281 | | skipping to change at line 281 | |
| const uword n_rows = x.get_n_rows(); | | const uword n_rows = x.get_n_rows(); | |
| const uword n_cols = x.get_n_cols(); | | const uword n_cols = x.get_n_cols(); | |
| | | | |
| arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise division"); | | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem
ent-wise division"); | |
| | | | |
| const eT k = x.aux; | | const eT k = x.aux; | |
| eT* out_mem = out.memptr(); | | eT* out_mem = out.memptr(); | |
| | | | |
| if(Proxy<T1>::prefer_at_accessor == false) | | if(Proxy<T1>::prefer_at_accessor == false) | |
| { | | { | |
|
| const uword n_elem = out.n_elem; | | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |
| //const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |
| causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |
| | | x.get_n_elem() : out.n_elem ) : out.n_elem; | |
| | | | |
| typename Proxy<T1>::ea_type P = x.P.get_ea(); | | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |
| | | | |
| arma_applier_1(/=); | | arma_applier_1(/=); | |
| } | | } | |
| else | | else | |
| { | | { | |
| const Proxy<T1>& P = x.P; | | const Proxy<T1>& P = x.P; | |
| | | | |
| arma_applier_2(/=); | | arma_applier_2(/=); | |
| | | | |
End of changes. 5 change blocks. |
| 15 lines changed or deleted | | 20 lines changed or added | |
|