Base_bones.hpp | Base_bones.hpp | |||
---|---|---|---|---|
skipping to change at line 45 | skipping to change at line 45 | |||
template<typename elem_type, typename derived> | template<typename elem_type, typename derived> | |||
struct Base_eval_Mat | struct Base_eval_Mat | |||
{ | { | |||
const derived& eval() const; | const derived& eval() const; | |||
}; | }; | |||
template<typename elem_type, typename derived> | template<typename elem_type, typename derived> | |||
struct Base_eval_expr | struct Base_eval_expr | |||
{ | { | |||
Mat<elem_type> eval() const; | Mat<elem_type> eval() const; //!< force the immediate evaluation of a d elayed expression | |||
}; | }; | |||
template<typename elem_type, typename derived, bool condition> | template<typename elem_type, typename derived, bool condition> | |||
struct Base_eval {}; | struct Base_eval {}; | |||
template<typename elem_type, typename derived> | template<typename elem_type, typename derived> | |||
struct Base_eval<elem_type, derived, true> { typedef Base_eval_Mat<elem_ty pe, derived> result; }; | struct Base_eval<elem_type, derived, true> { typedef Base_eval_Mat<elem_ty pe, derived> result; }; | |||
template<typename elem_type, typename derived> | template<typename elem_type, typename derived> | |||
struct Base_eval<elem_type, derived, false> { typedef Base_eval_expr<elem_t ype, derived> result; }; | struct Base_eval<elem_type, derived, false> { typedef Base_eval_expr<elem_t ype, derived> result; }; | |||
End of changes. 1 change blocks. | ||||
1 lines changed or deleted | 1 lines changed or added | |||
arma_version.hpp | arma_version.hpp | |||
---|---|---|---|---|
skipping to change at line 18 | skipping to change at line 18 | |||
// Lesser General Public License (LGPL) as published | // Lesser General Public License (LGPL) as published | |||
// by the Free Software Foundation, either version 3 | // by the Free Software Foundation, either version 3 | |||
// of the License or (at your option) any later version. | // of the License or (at your option) any later version. | |||
// (see http://www.opensource.org/licenses for more info) | // (see http://www.opensource.org/licenses for more info) | |||
//! \addtogroup arma_version | //! \addtogroup arma_version | |||
//! @{ | //! @{ | |||
#define ARMA_VERSION_MAJOR 3 | #define ARMA_VERSION_MAJOR 3 | |||
#define ARMA_VERSION_MINOR 1 | #define ARMA_VERSION_MINOR 1 | |||
#define ARMA_VERSION_PATCH 92 | #define ARMA_VERSION_PATCH 93 | |||
#define ARMA_VERSION_NAME "v3.2 beta 2" | #define ARMA_VERSION_NAME "v3.2 beta 3" | |||
struct arma_version | struct arma_version | |||
{ | { | |||
static const unsigned int major = ARMA_VERSION_MAJOR; | static const unsigned int major = ARMA_VERSION_MAJOR; | |||
static const unsigned int minor = ARMA_VERSION_MINOR; | static const unsigned int minor = ARMA_VERSION_MINOR; | |||
static const unsigned int patch = ARMA_VERSION_PATCH; | static const unsigned int patch = ARMA_VERSION_PATCH; | |||
static | static | |||
inline | inline | |||
std::string | std::string | |||
End of changes. 1 change blocks. | ||||
2 lines changed or deleted | 2 lines changed or added | |||
auxlib_bones.hpp | auxlib_bones.hpp | |||
---|---|---|---|---|
skipping to change at line 167 | skipping to change at line 167 | |||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline static bool svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Bas e<eT,T1>& X, const char mode); | inline static bool svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Bas e<eT,T1>& X, const char mode); | |||
template<typename T, typename T1> | template<typename T, typename T1> | |||
inline static bool svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< st d::complex<T> >& V, const Base< std::complex<T>, T1>& X, const char mode); | inline static bool svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< st d::complex<T> >& V, const Base< std::complex<T>, T1>& X, const char mode); | |||
// | // | |||
// solve | // solve | |||
template<typename eT> | ||||
inline static bool solve (Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B, c | ||||
onst bool slow = false); | ||||
template<typename eT> | ||||
inline static bool solve_od(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B); | ||||
template<typename eT> | ||||
inline static bool solve_ud(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B); | ||||
// | ||||
// solve_new | ||||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline static bool solve_new (Mat<eT>& out, Mat<eT>& A, const Base<eT,T 1>& X, const bool slow = false); | inline static bool solve (Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const bool slow = false); | |||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline static bool solve_new_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T 1>& X); | inline static bool solve_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X); | |||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline static bool solve_new_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T 1>& X); | inline static bool solve_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X); | |||
// | // | |||
// solve_tr | // solve_tr | |||
template<typename eT> | template<typename eT> | |||
inline static bool solve_tr(Mat<eT>& out, const Mat<eT>& A, const Mat<eT> & B, const uword layout); | inline static bool solve_tr(Mat<eT>& out, const Mat<eT>& A, const Mat<eT> & B, const uword layout); | |||
// | // | |||
// Schur decomposition | // Schur decomposition | |||
End of changes. 4 change blocks. | ||||
16 lines changed or deleted | 3 lines changed or added | |||
auxlib_meat.hpp | auxlib_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 2418 | skipping to change at line 2418 | |||
arma_ignore(X); | arma_ignore(X); | |||
arma_ignore(mode); | arma_ignore(mode); | |||
arma_stop("svd(): use of LAPACK needs to be enabled"); | arma_stop("svd(): use of LAPACK needs to be enabled"); | |||
return false; | return false; | |||
} | } | |||
#endif | #endif | |||
} | } | |||
//! Solve a system of linear equations. | //! Solve a system of linear equations. | |||
//! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | //! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | |||
template<typename eT> | ||||
inline | ||||
bool | ||||
auxlib::solve(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B, const bool slow) | ||||
{ | ||||
arma_extra_debug_sigprint(); | ||||
const uword A_n_rows = A.n_rows; | ||||
const uword B_n_cols = B.n_cols; | ||||
if(A.is_empty() || B.is_empty()) | ||||
{ | ||||
out.zeros(A.n_cols, B_n_cols); | ||||
return true; | ||||
} | ||||
else | ||||
{ | ||||
bool status = false; | ||||
if( (A_n_rows <= 4) && (slow == false) ) | ||||
{ | ||||
Mat<eT> A_inv; | ||||
status = auxlib::inv_noalias_tinymat(A_inv, A, A_n_rows); | ||||
if(status == true) | ||||
{ | ||||
out.set_size(A_n_rows, B_n_cols); | ||||
gemm_emul<false,false,false,false>::apply(out, A_inv, B); | ||||
return true; | ||||
} | ||||
} | ||||
if( (A_n_rows > 4) || (status == false) ) | ||||
{ | ||||
#if defined(ARMA_USE_ATLAS) | ||||
{ | ||||
out = B; | ||||
podarray<int> ipiv(A_n_rows + 2); // +2 for paranoia: old versions | ||||
of Atlas might be trashing memory | ||||
int info = atlas::clapack_gesv<eT>(atlas::CblasColMajor, A_n_rows, | ||||
B_n_cols, A.memptr(), A_n_rows, ipiv.memptr(), out.memptr(), A_n_rows); | ||||
return (info == 0); | ||||
} | ||||
#elif defined(ARMA_USE_LAPACK) | ||||
{ | ||||
out = B; | ||||
blas_int n = blas_int(A_n_rows); // assuming A is square | ||||
blas_int lda = blas_int(A_n_rows); | ||||
blas_int ldb = blas_int(A_n_rows); | ||||
blas_int nrhs = blas_int(B_n_cols); | ||||
blas_int info = 0; | ||||
podarray<blas_int> ipiv(A_n_rows + 2); // +2 for paranoia: some ve | ||||
rsions of Lapack might be trashing memory | ||||
arma_extra_debug_print("lapack::gesv()"); | ||||
lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.me | ||||
mptr(), &ldb, &info); | ||||
arma_extra_debug_print("lapack::gesv() -- finished"); | ||||
return (info == 0); | ||||
} | ||||
#else | ||||
{ | ||||
arma_stop("solve(): use of ATLAS or LAPACK needs to be enabled"); | ||||
return false; | ||||
} | ||||
#endif | ||||
} | ||||
} | ||||
return true; | ||||
} | ||||
//! Solve an over-determined system. | ||||
//! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | ||||
template<typename eT> | ||||
inline | ||||
bool | ||||
auxlib::solve_od(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B) | ||||
{ | ||||
arma_extra_debug_sigprint(); | ||||
#if defined(ARMA_USE_LAPACK) | ||||
{ | ||||
const uword A_n_rows = A.n_rows; | ||||
const uword A_n_cols = A.n_cols; | ||||
const uword B_n_rows = B.n_rows; | ||||
const uword B_n_cols = B.n_cols; | ||||
out.set_size(A_n_cols, B_n_cols); | ||||
if(A.is_empty() || B.is_empty()) | ||||
{ | ||||
out.zeros(); | ||||
return true; | ||||
} | ||||
char trans = 'N'; | ||||
blas_int m = blas_int(A_n_rows); | ||||
blas_int n = blas_int(A_n_cols); | ||||
blas_int lda = blas_int(A_n_rows); | ||||
blas_int ldb = blas_int(A_n_rows); | ||||
blas_int nrhs = blas_int(B_n_cols); | ||||
blas_int lwork = 2*((std::max)(blas_int(1), n + (std::max)(n, nrhs))); | ||||
blas_int info = 0; | ||||
Mat<eT> tmp = B; | ||||
podarray<eT> work( static_cast<uword>(lwork) ); | ||||
// NOTE: the dgels() function in the lapack library supplied by ATLAS 3 | ||||
.6 seems to have problems | ||||
arma_extra_debug_print("lapack::gels()"); | ||||
lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr() | ||||
, &ldb, work.memptr(), &lwork, &info ); | ||||
arma_extra_debug_print("lapack::gels() -- finished"); | ||||
for(uword col=0; col<B_n_cols; ++col) | ||||
{ | ||||
arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); | ||||
} | ||||
return (info == 0); | ||||
} | ||||
#else | ||||
{ | ||||
arma_ignore(out); | ||||
arma_ignore(A); | ||||
arma_ignore(B); | ||||
arma_stop("solve(): use of LAPACK needs to be enabled"); | ||||
return false; | ||||
} | ||||
#endif | ||||
} | ||||
//! Solve an under-determined system. | ||||
//! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | ||||
template<typename eT> | ||||
inline | ||||
bool | ||||
auxlib::solve_ud(Mat<eT>& out, Mat<eT>& A, const Mat<eT>& B) | ||||
{ | ||||
arma_extra_debug_sigprint(); | ||||
#if defined(ARMA_USE_LAPACK) | ||||
{ | ||||
const uword A_n_rows = A.n_rows; | ||||
const uword A_n_cols = A.n_cols; | ||||
const uword B_n_rows = B.n_rows; | ||||
const uword B_n_cols = B.n_cols; | ||||
out.set_size(A_n_cols, B_n_cols); | ||||
if(A.is_empty() || B.is_empty()) | ||||
{ | ||||
out.zeros(); | ||||
return true; | ||||
} | ||||
char trans = 'N'; | ||||
blas_int m = blas_int(A_n_rows); | ||||
blas_int n = blas_int(A_n_cols); | ||||
blas_int lda = blas_int(A_n_rows); | ||||
blas_int ldb = blas_int(A_n_cols); | ||||
blas_int nrhs = blas_int(B_n_cols); | ||||
blas_int lwork = 2*((std::max)(blas_int(1), m + (std::max)(m,nrhs))); | ||||
blas_int info = 0; | ||||
Mat<eT> tmp(A_n_cols, B_n_cols); | ||||
tmp.zeros(); | ||||
for(uword col=0; col<B_n_cols; ++col) | ||||
{ | ||||
eT* tmp_colmem = tmp.colptr(col); | ||||
arrayops::copy( tmp_colmem, B.colptr(col), B_n_rows ); | ||||
for(uword row=B_n_rows; row<A_n_cols; ++row) | ||||
{ | ||||
tmp_colmem[row] = eT(0); | ||||
} | ||||
} | ||||
podarray<eT> work( static_cast<uword>(lwork) ); | ||||
// NOTE: the dgels() function in the lapack library supplied by ATLAS 3 | ||||
.6 seems to have problems | ||||
arma_extra_debug_print("lapack::gels()"); | ||||
lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr() | ||||
, &ldb, work.memptr(), &lwork, &info ); | ||||
arma_extra_debug_print("lapack::gels() -- finished"); | ||||
for(uword col=0; col<B_n_cols; ++col) | ||||
{ | ||||
arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); | ||||
} | ||||
return (info == 0); | ||||
} | ||||
#else | ||||
{ | ||||
arma_ignore(out); | ||||
arma_ignore(A); | ||||
arma_ignore(B); | ||||
arma_stop("solve(): use of LAPACK needs to be enabled"); | ||||
return false; | ||||
} | ||||
#endif | ||||
} | ||||
//! Solve a system of linear equations. | ||||
//! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows | ||||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline | inline | |||
bool | bool | |||
auxlib::solve_new(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const boo l slow) | auxlib::solve(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const bool sl ow) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
bool status = false; | bool status = false; | |||
const uword A_n_rows = A.n_rows; | const uword A_n_rows = A.n_rows; | |||
if( (A_n_rows <= 4) && (slow == false) ) | if( (A_n_rows <= 4) && (slow == false) ) | |||
{ | { | |||
Mat<eT> A_inv; | Mat<eT> A_inv; | |||
skipping to change at line 2734 | skipping to change at line 2515 | |||
} | } | |||
return true; | return true; | |||
} | } | |||
//! Solve an over-determined system. | //! Solve an over-determined system. | |||
//! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | //! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows | |||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline | inline | |||
bool | bool | |||
auxlib::solve_new_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | auxlib::solve_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
#if defined(ARMA_USE_LAPACK) | #if defined(ARMA_USE_LAPACK) | |||
{ | { | |||
Mat<eT> tmp = X.get_ref(); | Mat<eT> tmp = X.get_ref(); | |||
const uword A_n_rows = A.n_rows; | const uword A_n_rows = A.n_rows; | |||
const uword A_n_cols = A.n_cols; | const uword A_n_cols = A.n_cols; | |||
skipping to change at line 2799 | skipping to change at line 2580 | |||
return false; | return false; | |||
} | } | |||
#endif | #endif | |||
} | } | |||
//! Solve an under-determined system. | //! Solve an under-determined system. | |||
//! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | //! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows | |||
template<typename eT, typename T1> | template<typename eT, typename T1> | |||
inline | inline | |||
bool | bool | |||
auxlib::solve_new_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | auxlib::solve_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
// TODO: this function provides the same results as Octave 3.4.2. | ||||
// TODO: however, these results are different than Matlab 7.12.0.635. | ||||
// TODO: figure out whether both Octave and Matlab are correct, or only o | ||||
ne of them | ||||
#if defined(ARMA_USE_LAPACK) | #if defined(ARMA_USE_LAPACK) | |||
{ | { | |||
const unwrap<T1> Y( X.get_ref() ); | const unwrap<T1> Y( X.get_ref() ); | |||
const Mat<eT>& B = Y.M; | const Mat<eT>& B = Y.M; | |||
const uword A_n_rows = A.n_rows; | const uword A_n_rows = A.n_rows; | |||
const uword A_n_cols = A.n_cols; | const uword A_n_cols = A.n_cols; | |||
const uword B_n_rows = B.n_rows; | const uword B_n_rows = B.n_rows; | |||
const uword B_n_cols = B.n_cols; | const uword B_n_cols = B.n_cols; | |||
End of changes. 5 change blocks. | ||||
230 lines changed or deleted | 8 lines changed or added | |||
config.hpp | config.hpp | |||
---|---|---|---|---|
skipping to change at line 13 | skipping to change at line 13 | |||
// | // | |||
// This file is part of the Armadillo C++ library. | // This file is part of the Armadillo C++ library. | |||
// It is provided without any warranty of fitness | // It is provided without any warranty of fitness | |||
// for any purpose. You can redistribute this file | // for any purpose. You can redistribute this file | |||
// and/or modify it under the terms of the GNU | // and/or modify it under the terms of the GNU | |||
// Lesser General Public License (LGPL) as published | // Lesser General Public License (LGPL) as published | |||
// by the Free Software Foundation, either version 3 | // by the Free Software Foundation, either version 3 | |||
// of the License or (at your option) any later version. | // of the License or (at your option) any later version. | |||
// (see http://www.opensource.org/licenses for more info) | // (see http://www.opensource.org/licenses for more info) | |||
// #define ARMA_64BIT_WORD | ||||
//// Uncomment the above line if you require matrices/vectors capable of ho | ||||
lding more than 4 billion elements. | ||||
//// Your machine and compiler must have support for 64 bit integers (eg. v | ||||
ia "long" or "long long") | ||||
// #define ARMA_USE_CXX11 | ||||
//// Uncomment the above line if you have a C++ compiler that supports the | ||||
C++11 standard | ||||
//// This will enable additional features, such as use of initialiser lists | ||||
#if !defined(ARMA_USE_LAPACK) | #if !defined(ARMA_USE_LAPACK) | |||
#define ARMA_USE_LAPACK | #define ARMA_USE_LAPACK | |||
//// Uncomment the above line if you have LAPACK or a fast replacement for LAPACK, | //// Uncomment the above line if you have LAPACK or a high-speed replacemen t for LAPACK, | |||
//// such as Intel's MKL, AMD's ACML, or the Accelerate framework. | //// such as Intel's MKL, AMD's ACML, or the Accelerate framework. | |||
//// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve rse. | //// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve rse. | |||
#endif | #endif | |||
#if !defined(ARMA_USE_BLAS) | #if !defined(ARMA_USE_BLAS) | |||
#define ARMA_USE_BLAS | #define ARMA_USE_BLAS | |||
//// Uncomment the above line if you have BLAS or a fast replacement for BL AS, | //// Uncomment the above line if you have BLAS or a high-speed replacement for BLAS, | |||
//// such as GotoBLAS, Intel's MKL, AMD's ACML, or the Accelerate framework . | //// such as GotoBLAS, Intel's MKL, AMD's ACML, or the Accelerate framework . | |||
//// BLAS is used for matrix multiplication. | //// BLAS is used for matrix multiplication. | |||
//// Without BLAS, matrix multiplication will still work, but might be slow er. | //// Without BLAS, matrix multiplication will still work, but might be slow er. | |||
#endif | #endif | |||
// #define ARMA_BLAS_LONG | // #define ARMA_BLAS_CAPITALS | |||
//// Uncomment the above line if your BLAS and LAPACK libraries use "long" | //// Uncomment the above line if your BLAS and LAPACK libraries have capita | |||
instead of "int" | lised function names (eg. ACML on 64-bit Windows) | |||
// #define ARMA_BLAS_LONG_LONG | ||||
//// Uncomment the above line if your BLAS and LAPACK libraries use "long l | ||||
ong" instead of "int" | ||||
#define ARMA_BLAS_UNDERSCORE | #define ARMA_BLAS_UNDERSCORE | |||
//// Uncomment the above line if your BLAS and LAPACK libraries have functi on names with a trailing underscore. | //// Uncomment the above line if your BLAS and LAPACK libraries have functi on names with a trailing underscore. | |||
//// Conversely, comment it out if the function names don't have a trailing underscore. | //// Conversely, comment it out if the function names don't have a trailing underscore. | |||
// #define ARMA_BLAS_CAPITALS | // #define ARMA_BLAS_LONG | |||
//// Uncomment the above line if your BLAS and LAPACK libraries have capita | //// Uncomment the above line if your BLAS and LAPACK libraries use "long" | |||
lised function names (eg. ACML on 64-bit Windows) | instead of "int" | |||
#if !defined(ARMA_MAT_PREALLOC) | // #define ARMA_BLAS_LONG_LONG | |||
#define ARMA_MAT_PREALLOC 16 | //// Uncomment the above line if your BLAS and LAPACK libraries use "long l | |||
#endif | ong" instead of "int" | |||
//// This is the number of preallocated elements used by matrices and vecto | ||||
rs; | ||||
//// it must be an integer that is at least 1. | ||||
//// If you mainly use lots of very small vectors (eg. <= 4 elements), | ||||
//// change the number to the size of your vectors. | ||||
// #define ARMA_USE_TBB_ALLOC | // #define ARMA_USE_TBB_ALLOC | |||
//// Uncomment the above line if you want to use Intel TBB scalable_malloc( ) and scalable_free() instead of standard new[] and delete[] | //// Uncomment the above line if you want to use Intel TBB scalable_malloc( ) and scalable_free() instead of standard new[] and delete[] | |||
// #define ARMA_USE_MKL_ALLOC | // #define ARMA_USE_MKL_ALLOC | |||
//// Uncomment the above line if you want to use Intel MKL mkl_malloc() and mkl_free() instead of standard new[] and delete[] | //// Uncomment the above line if you want to use Intel MKL mkl_malloc() and mkl_free() instead of standard new[] and delete[] | |||
#define ARMA_USE_ATLAS | #define ARMA_USE_ATLAS | |||
#define ARMA_ATLAS_INCLUDE_DIR /usr/include/ | #define ARMA_ATLAS_INCLUDE_DIR /usr/include/ | |||
//// If you're using ATLAS and the compiler can't find cblas.h and/or clapa ck.h | //// If you're using ATLAS and the compiler can't find cblas.h and/or clapa ck.h | |||
//// uncomment the above define and specify the appropriate include directo ry. | //// uncomment the above define and specify the appropriate include directo ry. | |||
//// Make sure the directory has a trailing / | //// Make sure the directory has a trailing / | |||
// #define ARMA_64BIT_WORD | ||||
//// Uncomment the above line if you require matrices/vectors capable of ho | ||||
lding more than 4 billion elements. | ||||
//// Your machine and compiler must have support for 64 bit integers (eg. v | ||||
ia "long" or "long long") | ||||
// #define ARMA_USE_CXX11 | ||||
//// Uncomment the above line if you have a C++ compiler that supports the | ||||
C++11 standard | ||||
//// This will enable additional features, such as use of initialiser lists | ||||
#if !defined(ARMA_MAT_PREALLOC) | ||||
#define ARMA_MAT_PREALLOC 16 | ||||
#endif | ||||
//// This is the number of preallocated elements used by matrices and vecto | ||||
rs; | ||||
//// it must be an integer that is at least 1. | ||||
//// If you mainly use lots of very small vectors (eg. <= 4 elements), | ||||
//// change the number to the size of your vectors. | ||||
// #define ARMA_NO_DEBUG | ||||
//// Uncomment the above line if you want to disable all run-time checks. | ||||
//// This will result in faster code, but you first need to make sure that | ||||
your code runs correctly! | ||||
//// We strongly recommend to have the run-time checks enabled during devel | ||||
opment, | ||||
//// as this greatly aids in finding mistakes in your code, and hence speed | ||||
s up development. | ||||
//// We recommend that run-time checks be disabled _only_ for the shipped v | ||||
ersion of your program. | ||||
// #define ARMA_EXTRA_DEBUG | ||||
//// Uncomment the above line if you want to see the function traces of how | ||||
Armadillo evaluates expressions. | ||||
//// This is mainly useful for debugging of the library. | ||||
#define ARMA_USE_BOOST | #define ARMA_USE_BOOST | |||
#define ARMA_USE_BOOST_DATE | #define ARMA_USE_BOOST_DATE | |||
#define ARMA_USE_WRAPPER | #define ARMA_USE_WRAPPER | |||
/* #undef ARMA_USE_HDF5 */ | /* #undef ARMA_USE_HDF5 */ | |||
#if !defined(ARMA_DEFAULT_OSTREAM) | #if !defined(ARMA_DEFAULT_OSTREAM) | |||
#define ARMA_DEFAULT_OSTREAM std::cout | #define ARMA_DEFAULT_OSTREAM std::cout | |||
#endif | #endif | |||
#define ARMA_PRINT_LOGIC_ERRORS | #define ARMA_PRINT_LOGIC_ERRORS | |||
#define ARMA_PRINT_RUNTIME_ERRORS | #define ARMA_PRINT_RUNTIME_ERRORS | |||
#define ARMA_HAVE_STD_ISFINITE | #define ARMA_HAVE_STD_ISFINITE | |||
#define ARMA_HAVE_STD_ISINF | #define ARMA_HAVE_STD_ISINF | |||
#define ARMA_HAVE_STD_ISNAN | #define ARMA_HAVE_STD_ISNAN | |||
#define ARMA_HAVE_STD_SNPRINTF | #define ARMA_HAVE_STD_SNPRINTF | |||
#define ARMA_HAVE_LOG1P | #define ARMA_HAVE_LOG1P | |||
#define ARMA_HAVE_GETTIMEOFDAY | #define ARMA_HAVE_GETTIMEOFDAY | |||
// #define ARMA_EXTRA_DEBUG | ||||
// #define ARMA_NO_DEBUG | ||||
#if defined(ARMA_DONT_USE_ATLAS) | ||||
#undef ARMA_USE_ATLAS | ||||
#undef ARMA_ATLAS_INCLUDE_DIR | ||||
#endif | ||||
#if defined(ARMA_DONT_USE_LAPACK) | #if defined(ARMA_DONT_USE_LAPACK) | |||
#undef ARMA_USE_LAPACK | #undef ARMA_USE_LAPACK | |||
#endif | #endif | |||
#if defined(ARMA_DONT_USE_BLAS) | #if defined(ARMA_DONT_USE_BLAS) | |||
#undef ARMA_USE_BLAS | #undef ARMA_USE_BLAS | |||
#endif | #endif | |||
#if defined(ARMA_DONT_USE_ATLAS) | ||||
#undef ARMA_USE_ATLAS | ||||
#undef ARMA_ATLAS_INCLUDE_DIR | ||||
#endif | ||||
#if defined(ARMA_DONT_PRINT_LOGIC_ERRORS) | #if defined(ARMA_DONT_PRINT_LOGIC_ERRORS) | |||
#undef ARMA_PRINT_LOGIC_ERRORS | #undef ARMA_PRINT_LOGIC_ERRORS | |||
#endif | #endif | |||
#if defined(ARMA_DONT_PRINT_RUNTIME_ERRORS) | #if defined(ARMA_DONT_PRINT_RUNTIME_ERRORS) | |||
#undef ARMA_PRINT_RUNTIME_ERRORS | #undef ARMA_PRINT_RUNTIME_ERRORS | |||
#endif | #endif | |||
End of changes. 9 change blocks. | ||||
39 lines changed or deleted | 52 lines changed or added | |||
eglue_core_meat.hpp | eglue_core_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 149 | skipping to change at line 149 | |||
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | |||
// NOTE: we're assuming that the matrix has already been set to the corre ct size and there is no aliasing; | // NOTE: we're assuming that the matrix has already been set to the corre ct size and there is no aliasing; | |||
// size setting and alias checking is done by either the Mat contructor o r operator=() | // size setting and alias checking is done by either the Mat contructor o r operator=() | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(prefer_at_accessor == false) | if(prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |||
x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |||
typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |||
if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(=, +); } | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(=, +); } | |||
else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(=, -); } | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(=, -); } | |||
else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(=, /); } | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(=, /); } | |||
else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(=, *); } | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(=, *); } | |||
} | } | |||
else | else | |||
skipping to change at line 197 | skipping to change at line 197 | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi tion"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi tion"); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | |||
if(prefer_at_accessor == false) | if(prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |||
x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |||
typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |||
if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(+=, +); } | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(+=, +); } | |||
else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(+=, -); } | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(+=, -); } | |||
else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(+=, /); } | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(+=, /); } | |||
else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(+=, *); } | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(+=, *); } | |||
} | } | |||
else | else | |||
skipping to change at line 242 | skipping to change at line 242 | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt raction"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt raction"); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | |||
if(prefer_at_accessor == false) | if(prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |||
x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |||
typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |||
if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(-=, +); } | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(-=, +); } | |||
else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(-=, -); } | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(-=, -); } | |||
else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(-=, /); } | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(-=, /); } | |||
else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(-=, *); } | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(-=, *); } | |||
} | } | |||
else | else | |||
skipping to change at line 287 | skipping to change at line 287 | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise multiplication"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise multiplication"); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | |||
if(prefer_at_accessor == false) | if(prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |||
x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |||
typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |||
if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(*=, +); } | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(*=, +); } | |||
else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(*=, -); } | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(*=, -); } | |||
else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(*=, /); } | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(*=, /); } | |||
else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(*=, *); } | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(*=, *); } | |||
} | } | |||
else | else | |||
skipping to change at line 332 | skipping to change at line 332 | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise division"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise division"); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | const bool prefer_at_accessor = (Proxy<T1>::prefer_at_accessor || Proxy<T 2>::prefer_at_accessor); | |||
if(prefer_at_accessor == false) | if(prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed || Proxy<T2>::is_fixed) ? ( ( | |||
x.get_n_elem() <= 4) ? x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | typename Proxy<T1>::ea_type P1 = x.P1.get_ea(); | |||
typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | typename Proxy<T2>::ea_type P2 = x.P2.get_ea(); | |||
if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(/=, +); } | if(is_same_type<eglue_type, eglue_plus >::value == true) { arma_ap plier_1(/=, +); } | |||
else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(/=, -); } | else if(is_same_type<eglue_type, eglue_minus>::value == true) { arma_ap plier_1(/=, -); } | |||
else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(/=, /); } | else if(is_same_type<eglue_type, eglue_div >::value == true) { arma_ap plier_1(/=, /); } | |||
else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(/=, *); } | else if(is_same_type<eglue_type, eglue_schur>::value == true) { arma_ap plier_1(/=, *); } | |||
} | } | |||
else | else | |||
End of changes. 5 change blocks. | ||||
15 lines changed or deleted | 20 lines changed or added | |||
eop_core_meat.hpp | eop_core_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 134 | skipping to change at line 134 | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
// NOTE: we're assuming that the matrix has already been set to the corre ct size and there is no aliasing; | // NOTE: we're assuming that the matrix has already been set to the corre ct size and there is no aliasing; | |||
// size setting and alias checking is done by either the Mat contructor o r operator=() | // size setting and alias checking is done by either the Mat contructor o r operator=() | |||
const eT k = x.aux; | const eT k = x.aux; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(Proxy<T1>::prefer_at_accessor == false) | if(Proxy<T1>::prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |||
x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P = x.P.get_ea(); | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |||
arma_applier_1(=); | arma_applier_1(=); | |||
} | } | |||
else | else | |||
{ | { | |||
const uword n_rows = x.get_n_rows(); | const uword n_rows = x.get_n_rows(); | |||
const uword n_cols = x.get_n_cols(); | const uword n_cols = x.get_n_cols(); | |||
skipping to change at line 173 | skipping to change at line 173 | |||
const uword n_rows = x.get_n_rows(); | const uword n_rows = x.get_n_rows(); | |||
const uword n_cols = x.get_n_cols(); | const uword n_cols = x.get_n_cols(); | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi tion"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addi tion"); | |||
const eT k = x.aux; | const eT k = x.aux; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(Proxy<T1>::prefer_at_accessor == false) | if(Proxy<T1>::prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |||
x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P = x.P.get_ea(); | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |||
arma_applier_1(+=); | arma_applier_1(+=); | |||
} | } | |||
else | else | |||
{ | { | |||
const Proxy<T1>& P = x.P; | const Proxy<T1>& P = x.P; | |||
arma_applier_2(+=); | arma_applier_2(+=); | |||
skipping to change at line 209 | skipping to change at line 209 | |||
const uword n_rows = x.get_n_rows(); | const uword n_rows = x.get_n_rows(); | |||
const uword n_cols = x.get_n_cols(); | const uword n_cols = x.get_n_cols(); | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt raction"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subt raction"); | |||
const eT k = x.aux; | const eT k = x.aux; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(Proxy<T1>::prefer_at_accessor == false) | if(Proxy<T1>::prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |||
x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P = x.P.get_ea(); | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |||
arma_applier_1(-=); | arma_applier_1(-=); | |||
} | } | |||
else | else | |||
{ | { | |||
const Proxy<T1>& P = x.P; | const Proxy<T1>& P = x.P; | |||
arma_applier_2(-=); | arma_applier_2(-=); | |||
skipping to change at line 245 | skipping to change at line 245 | |||
const uword n_rows = x.get_n_rows(); | const uword n_rows = x.get_n_rows(); | |||
const uword n_cols = x.get_n_cols(); | const uword n_cols = x.get_n_cols(); | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise multiplication"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise multiplication"); | |||
const eT k = x.aux; | const eT k = x.aux; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(Proxy<T1>::prefer_at_accessor == false) | if(Proxy<T1>::prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |||
x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P = x.P.get_ea(); | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |||
arma_applier_1(*=); | arma_applier_1(*=); | |||
} | } | |||
else | else | |||
{ | { | |||
const Proxy<T1>& P = x.P; | const Proxy<T1>& P = x.P; | |||
arma_applier_2(*=); | arma_applier_2(*=); | |||
skipping to change at line 281 | skipping to change at line 281 | |||
const uword n_rows = x.get_n_rows(); | const uword n_rows = x.get_n_rows(); | |||
const uword n_cols = x.get_n_cols(); | const uword n_cols = x.get_n_cols(); | |||
arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise division"); | arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "elem ent-wise division"); | |||
const eT k = x.aux; | const eT k = x.aux; | |||
eT* out_mem = out.memptr(); | eT* out_mem = out.memptr(); | |||
if(Proxy<T1>::prefer_at_accessor == false) | if(Proxy<T1>::prefer_at_accessor == false) | |||
{ | { | |||
const uword n_elem = out.n_elem; | // for fixed-sized vectors with n_elem >= 6, using x.get_n_elem() direc | |||
//const uword n_elem = x.get_n_elem(); // for fixed-sized matrices this | tly can cause a mis-optimisation (slowdown) of the loop under GCC 4.4 | |||
causes a mis-optimisation (slowdown) of the loop under GCC 4.4 | const uword n_elem = (Proxy<T1>::is_fixed) ? ( (x.get_n_elem() <= 4) ? | |||
x.get_n_elem() : out.n_elem ) : out.n_elem; | ||||
typename Proxy<T1>::ea_type P = x.P.get_ea(); | typename Proxy<T1>::ea_type P = x.P.get_ea(); | |||
arma_applier_1(/=); | arma_applier_1(/=); | |||
} | } | |||
else | else | |||
{ | { | |||
const Proxy<T1>& P = x.P; | const Proxy<T1>& P = x.P; | |||
arma_applier_2(/=); | arma_applier_2(/=); | |||
End of changes. 5 change blocks. | ||||
15 lines changed or deleted | 20 lines changed or added | |||
fn_norm.hpp | fn_norm.hpp | |||
---|---|---|---|---|
skipping to change at line 424 | skipping to change at line 424 | |||
return min_val; | return min_val; | |||
} | } | |||
template<typename T1> | template<typename T1> | |||
inline | inline | |||
typename T1::pod_type | typename T1::pod_type | |||
arma_mat_norm_1(const Proxy<T1>& P) | arma_mat_norm_1(const Proxy<T1>& P) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
typedef typename T1::elem_type eT; | ||||
typedef typename T1::pod_type T; | ||||
const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); | ||||
const Mat<eT>& X = tmp.M; | ||||
// TODO: this can be sped up with a dedicated implementation | // TODO: this can be sped up with a dedicated implementation | |||
return as_scalar( max( sum(abs(X)), 1) ); | return as_scalar( max( sum(abs(P.Q), 0), 1) ); | |||
} | } | |||
template<typename T1> | template<typename T1> | |||
inline | inline | |||
typename T1::pod_type | typename T1::pod_type | |||
arma_mat_norm_2(const Proxy<T1>& P) | arma_mat_norm_2(const Proxy<T1>& P) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
typedef typename T1::elem_type eT; | ||||
typedef typename T1::pod_type T; | typedef typename T1::pod_type T; | |||
const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); | // TODO: is the SVD based approach only valid for square matrices? | |||
const Mat<eT>& X = tmp.M; | ||||
Col<T> S; | Col<T> S; | |||
svd(S, X); | svd(S, P.Q); | |||
return (S.n_elem > 0) ? max(S) : T(0); | return (S.n_elem > 0) ? max(S) : T(0); | |||
} | } | |||
template<typename T1> | template<typename T1> | |||
inline | inline | |||
typename T1::pod_type | typename T1::pod_type | |||
arma_mat_norm_inf(const Proxy<T1>& P) | arma_mat_norm_inf(const Proxy<T1>& P) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
typedef typename T1::elem_type eT; | ||||
typedef typename T1::pod_type T; | ||||
const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); | ||||
const Mat<eT>& X = tmp.M; | ||||
// TODO: this can be sped up with a dedicated implementation | // TODO: this can be sped up with a dedicated implementation | |||
return as_scalar( max( sum(abs(X),1) ) ); | return as_scalar( max( sum(abs(P.Q), 1), 0) ); | |||
} | } | |||
template<typename T1> | template<typename T1> | |||
inline | inline | |||
arma_warn_unused | arma_warn_unused | |||
typename T1::pod_type | typename T1::pod_type | |||
norm | norm | |||
( | ( | |||
const Base<typename T1::elem_type,T1>& X, | const Base<typename T1::elem_type,T1>& X, | |||
const uword k, | const uword k, | |||
End of changes. 7 change blocks. | ||||
18 lines changed or deleted | 4 lines changed or added | |||
glue_solve_meat.hpp | glue_solve_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 30 | skipping to change at line 30 | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
const uword A_n_rows = A.n_rows; | const uword A_n_rows = A.n_rows; | |||
const uword A_n_cols = A.n_cols; | const uword A_n_cols = A.n_cols; | |||
bool status = false; | bool status = false; | |||
if(A_n_rows == A_n_cols) | if(A_n_rows == A_n_cols) | |||
{ | { | |||
status = auxlib::solve_new(out, A, X, slow); | status = auxlib::solve(out, A, X, slow); | |||
} | } | |||
else | else | |||
if(A_n_rows > A_n_cols) | if(A_n_rows > A_n_cols) | |||
{ | { | |||
arma_extra_debug_print("solve(): detected over-determined system"); | arma_extra_debug_print("solve(): detected over-determined system"); | |||
status = auxlib::solve_new_od(out, A, X); | status = auxlib::solve_od(out, A, X); | |||
} | } | |||
else | else | |||
{ | { | |||
arma_extra_debug_print("solve(): detected under-determined system"); | arma_extra_debug_print("solve(): detected under-determined system"); | |||
status = auxlib::solve_new_ud(out, A, X); | status = auxlib::solve_ud(out, A, X); | |||
} | } | |||
if(status == false) | if(status == false) | |||
{ | { | |||
out.reset(); | out.reset(); | |||
arma_bad("solve(): solution not found"); | arma_bad("solve(): solution not found"); | |||
} | } | |||
} | } | |||
template<typename T1, typename T2> | template<typename T1, typename T2> | |||
End of changes. 3 change blocks. | ||||
3 lines changed or deleted | 3 lines changed or added | |||
op_htrans_meat.hpp | op_htrans_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 367 | skipping to change at line 367 | |||
inline | inline | |||
void | void | |||
op_htrans2::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X, cons t typename T1::elem_type val) | op_htrans2::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X, cons t typename T1::elem_type val) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
const Proxy<T1> P(X); | const Proxy<T1> P(X); | |||
if( | // allow detection of in-place transpose | |||
(is_Mat<typename Proxy<T1>::stored_type>::value || is_Mat_fixed<typenam | if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1 | |||
e Proxy<T1>::stored_type>::value) | >::fake_mat == false) ) | |||
&& | ||||
(Proxy<T1>::fake_mat == false) // can't rely on simple alias checking | ||||
for matrices constructed out of auxiliary memory | ||||
) | ||||
{ | { | |||
const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); // need this u nwrap to keep stupid compilers happy | const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); | |||
op_htrans2::apply(out, tmp.M, val); | op_htrans2::apply(out, tmp.M, val); | |||
} | } | |||
else | else | |||
{ | { | |||
const uword n_rows = P.get_n_rows(); | const uword n_rows = P.get_n_rows(); | |||
const uword n_cols = P.get_n_cols(); | const uword n_cols = P.get_n_cols(); | |||
const bool is_alias = P.is_alias(out); | const bool is_alias = P.is_alias(out); | |||
End of changes. 2 change blocks. | ||||
8 lines changed or deleted | 4 lines changed or added | |||
op_strans_meat.hpp | op_strans_meat.hpp | |||
---|---|---|---|---|
skipping to change at line 203 | skipping to change at line 203 | |||
inline | inline | |||
void | void | |||
op_strans::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X) | op_strans::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X) | |||
{ | { | |||
arma_extra_debug_sigprint(); | arma_extra_debug_sigprint(); | |||
typedef typename T1::elem_type eT; | typedef typename T1::elem_type eT; | |||
const Proxy<T1> P(X); | const Proxy<T1> P(X); | |||
if( | // allow detection of in-place transpose | |||
(is_Mat<typename Proxy<T1>::stored_type>::value || is_Mat_fixed<typenam | if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1 | |||
e Proxy<T1>::stored_type>::value) | >::fake_mat == false) ) | |||
&& | ||||
(Proxy<T1>::fake_mat == false) // can't rely on simple alias checking | ||||
for matrices constructed out of auxiliary memory | ||||
) | ||||
{ | { | |||
const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); // need this u nwrap to keep stupid compilers happy | const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q); | |||
op_strans::apply(out, tmp.M); | op_strans::apply(out, tmp.M); | |||
} | } | |||
else | else | |||
{ | { | |||
const uword n_rows = P.get_n_rows(); | const uword n_rows = P.get_n_rows(); | |||
const uword n_cols = P.get_n_cols(); | const uword n_cols = P.get_n_cols(); | |||
const bool is_alias = P.is_alias(out); | const bool is_alias = P.is_alias(out); | |||
End of changes. 2 change blocks. | ||||
8 lines changed or deleted | 4 lines changed or added | |||