| arma_cmath.hpp | | arma_cmath.hpp | |
| | | | |
| skipping to change at line 45 | | skipping to change at line 45 | |
| return std::tr1::isfinite(x); | | return std::tr1::isfinite(x); | |
| } | | } | |
| #elif defined(ARMA_HAVE_ISFINITE) | | #elif defined(ARMA_HAVE_ISFINITE) | |
| { | | { | |
| return (std::isfinite(x) != 0); | | return (std::isfinite(x) != 0); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| const float y = (std::numeric_limits<float>::max)(); | | const float y = (std::numeric_limits<float>::max)(); | |
| | | | |
|
| return (x == x) && (x >= -y) && (x <= y); | | const volatile float xx = x; | |
| | | | |
| | | return (xx == xx) && (x >= -y) && (x <= y); | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<> | | template<> | |
| arma_inline | | arma_inline | |
| bool | | bool | |
| arma_isfinite(double x) | | arma_isfinite(double x) | |
| { | | { | |
| #if defined(ARMA_USE_CXX11) | | #if defined(ARMA_USE_CXX11) | |
| | | | |
| skipping to change at line 71 | | skipping to change at line 73 | |
| return std::tr1::isfinite(x); | | return std::tr1::isfinite(x); | |
| } | | } | |
| #elif defined(ARMA_HAVE_ISFINITE) | | #elif defined(ARMA_HAVE_ISFINITE) | |
| { | | { | |
| return (std::isfinite(x) != 0); | | return (std::isfinite(x) != 0); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| const double y = (std::numeric_limits<double>::max)(); | | const double y = (std::numeric_limits<double>::max)(); | |
| | | | |
|
| return (x == x) && (x >= -y) && (x <= y); | | const volatile double xx = x; | |
| | | | |
| | | return (xx == xx) && (x >= -y) && (x <= y); | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<typename T> | | template<typename T> | |
| arma_inline | | arma_inline | |
| bool | | bool | |
| arma_isfinite(const std::complex<T>& x) | | arma_isfinite(const std::complex<T>& x) | |
| { | | { | |
| if( (arma_isfinite(x.real()) == false) || (arma_isfinite(x.imag()) == fal
se) ) | | if( (arma_isfinite(x.real()) == false) || (arma_isfinite(x.imag()) == fal
se) ) | |
| { | | { | |
| return false; | | return false; | |
| } | | } | |
| else | | else | |
| { | | { | |
| return true; | | return true; | |
| } | | } | |
| } | | } | |
| | | | |
|
| | | // | |
| | | // wrappers for isinf | |
| | | | |
| | | template<typename eT> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isinf(eT val) | |
| | | { | |
| | | arma_ignore(val); | |
| | | | |
| | | return false; | |
| | | } | |
| | | | |
| | | template<> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isinf(float x) | |
| | | { | |
| | | #if defined(ARMA_USE_CXX11) | |
| | | { | |
| | | return std::isinf(x); | |
| | | } | |
| | | #elif defined(ARMA_HAVE_ISINF) | |
| | | { | |
| | | return (std::isinf(x) != 0); | |
| | | } | |
| | | #else | |
| | | { | |
| | | const float y = (std::numeric_limits<float>::max)(); | |
| | | | |
| | | const volatile float xx = x; | |
| | | | |
| | | return (xx == xx) && ((x < -y) || (x > y)); | |
| | | } | |
| | | #endif | |
| | | } | |
| | | | |
| | | template<> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isinf(double x) | |
| | | { | |
| | | #if defined(ARMA_USE_CXX11) | |
| | | { | |
| | | return std::isinf(x); | |
| | | } | |
| | | #elif defined(ARMA_HAVE_ISINF) | |
| | | { | |
| | | return (std::isinf(x) != 0); | |
| | | } | |
| | | #else | |
| | | { | |
| | | const double y = (std::numeric_limits<double>::max)(); | |
| | | | |
| | | const volatile double xx = x; | |
| | | | |
| | | return (xx == xx) && ((x < -y) || (x > y)); | |
| | | } | |
| | | #endif | |
| | | } | |
| | | | |
| | | template<typename T> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isinf(const std::complex<T>& x) | |
| | | { | |
| | | return ( arma_isinf(x.real()) || arma_isinf(x.imag()) ); | |
| | | } | |
| | | | |
| | | // | |
| | | // wrappers for isnan | |
| | | | |
| | | template<typename eT> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isnan(eT val) | |
| | | { | |
| | | arma_ignore(val); | |
| | | | |
| | | return false; | |
| | | } | |
| | | | |
| | | template<> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isnan(float x) | |
| | | { | |
| | | #if defined(ARMA_USE_CXX11) | |
| | | { | |
| | | return std::isnan(x); | |
| | | } | |
| | | #elif defined(ARMA_HAVE_ISNAN) | |
| | | { | |
| | | return (std::isnan(x) != 0); | |
| | | } | |
| | | #else | |
| | | { | |
| | | const volatile float xx = x; | |
| | | | |
| | | return (xx != xx); | |
| | | } | |
| | | #endif | |
| | | } | |
| | | | |
| | | template<> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isnan(double x) | |
| | | { | |
| | | #if defined(ARMA_USE_CXX11) | |
| | | { | |
| | | return std::isnan(x); | |
| | | } | |
| | | #elif defined(ARMA_HAVE_ISNAN) | |
| | | { | |
| | | return (std::isnan(x) != 0); | |
| | | } | |
| | | #else | |
| | | { | |
| | | const volatile double xx = x; | |
| | | | |
| | | return (xx != xx); | |
| | | } | |
| | | #endif | |
| | | } | |
| | | | |
| | | template<typename T> | |
| | | arma_inline | |
| | | bool | |
| | | arma_isnan(const std::complex<T>& x) | |
| | | { | |
| | | return ( arma_isnan(x.real()) || arma_isnan(x.imag()) ); | |
| | | } | |
| | | | |
| // rudimentary wrappers for log1p() | | // rudimentary wrappers for log1p() | |
| | | | |
| arma_inline | | arma_inline | |
| float | | float | |
| arma_log1p(const float x) | | arma_log1p(const float x) | |
| { | | { | |
| #if defined(ARMA_USE_CXX11) | | #if defined(ARMA_USE_CXX11) | |
| { | | { | |
| return std::log1p(x); | | return std::log1p(x); | |
| } | | } | |
| | | | |
End of changes. 3 change blocks. |
| 2 lines changed or deleted | | 140 lines changed or added | |
|
| auxlib_bones.hpp | | auxlib_bones.hpp | |
| | | | |
| skipping to change at line 133 | | skipping to change at line 133 | |
| template<typename T, typename T1, typename T2> | | template<typename T, typename T1, typename T2> | |
| inline static bool eig_pair(Col< std::complex<T> >& eigval, Mat<T>& l_eig
vec, Mat<T>& r_eigvec, const Base<T,T1>& X, const Base<T,T2>& Y, const char
side); | | inline static bool eig_pair(Col< std::complex<T> >& eigval, Mat<T>& l_eig
vec, Mat<T>& r_eigvec, const Base<T,T1>& X, const Base<T,T2>& Y, const char
side); | |
| | | | |
| template<typename T, typename T1, typename T2> | | template<typename T, typename T1, typename T2> | |
| inline static bool eig_pair(Col< std::complex<T> >& eigval, Mat< std::com
plex<T> >& l_eigvec, Mat< std::complex<T> >& r_eigvec, const Base< std::com
plex<T>, T1 >& X, const Base< std::complex<T>, T2 >& Y, const char side); | | inline static bool eig_pair(Col< std::complex<T> >& eigval, Mat< std::com
plex<T> >& l_eigvec, Mat< std::complex<T> >& r_eigvec, const Base< std::com
plex<T>, T1 >& X, const Base< std::complex<T>, T2 >& Y, const char side); | |
| | | | |
| // | | // | |
| // chol | | // chol | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
|
| inline static bool chol(Mat<eT>& out, const Base<eT,T1>& X); | | inline static bool chol(Mat<eT>& out, const Base<eT,T1>& X, const uword l
ayout); | |
| | | | |
| // | | // | |
| // qr | | // qr | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline static bool qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X); | | inline static bool qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X); | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline static bool qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X); | | inline static bool qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X); | |
| | | | |
| | | | |
End of changes. 1 change blocks. |
| 1 lines changed or deleted | | 1 lines changed or added | |
|
| auxlib_meat.hpp | | auxlib_meat.hpp | |
| | | | |
| skipping to change at line 1764 | | skipping to change at line 1764 | |
| arma_ignore(side); | | arma_ignore(side); | |
| arma_stop("eig_pair(): use of LAPACK needs to be enabled"); | | arma_stop("eig_pair(): use of LAPACK needs to be enabled"); | |
| return false; | | return false; | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline | | inline | |
| bool | | bool | |
|
| auxlib::chol(Mat<eT>& out, const Base<eT,T1>& X) | | auxlib::chol(Mat<eT>& out, const Base<eT,T1>& X, const uword layout) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
| #if defined(ARMA_USE_LAPACK) | | #if defined(ARMA_USE_LAPACK) | |
| { | | { | |
| out = X.get_ref(); | | out = X.get_ref(); | |
| | | | |
| arma_debug_check( (out.is_square() == false), "chol(): given matrix is
not square" ); | | arma_debug_check( (out.is_square() == false), "chol(): given matrix is
not square" ); | |
| | | | |
|
| if(out.is_empty()) | | if(out.is_empty()) { return true; } | |
| { | | | |
| return true; | | | |
| } | | | |
| | | | |
| const uword out_n_rows = out.n_rows; | | const uword out_n_rows = out.n_rows; | |
| | | | |
|
| char uplo = 'U'; | | char uplo = (layout == 0) ? 'U' : 'L'; | |
| blas_int n = out_n_rows; | | blas_int n = out_n_rows; | |
| blas_int info = 0; | | blas_int info = 0; | |
| | | | |
| lapack::potrf(&uplo, &n, out.memptr(), &n, &info); | | lapack::potrf(&uplo, &n, out.memptr(), &n, &info); | |
| | | | |
|
| for(uword col=0; col<out_n_rows; ++col) | | if(layout == 0) | |
| { | | { | |
|
| eT* colptr = out.colptr(col); | | for(uword col=0; col < out_n_rows; ++col) | |
| | | { | |
| | | eT* colptr = out.colptr(col); | |
| | | | |
|
| for(uword row=(col+1); row < out_n_rows; ++row) | | for(uword row=(col+1); row < out_n_rows; ++row) { colptr[row] = eT | |
| | | (0); } | |
| | | } | |
| | | } | |
| | | else | |
| | | { | |
| | | for(uword col=1; col < out_n_rows; ++col) | |
| { | | { | |
|
| colptr[row] = eT(0); | | eT* colptr = out.colptr(col); | |
| | | | |
| | | for(uword row=0; row < col; ++row) { colptr[row] = eT(0); } | |
| } | | } | |
| } | | } | |
| | | | |
| return (info == 0); | | return (info == 0); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| arma_ignore(out); | | arma_ignore(out); | |
| arma_ignore(X); | | arma_ignore(X); | |
|
| | | arma_ignore(layout); | |
| | | | |
| arma_stop("chol(): use of LAPACK needs to be enabled"); | | arma_stop("chol(): use of LAPACK needs to be enabled"); | |
| return false; | | return false; | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<typename eT, typename T1> | | template<typename eT, typename T1> | |
| inline | | inline | |
| bool | | bool | |
| | | | |
End of changes. 8 change blocks. |
| 10 lines changed or deleted | | 18 lines changed or added | |
|
| config.hpp | | config.hpp | |
| // Copyright (C) 2008-2014 Conrad Sanderson | | // Copyright (C) 2008-2014 Conrad Sanderson | |
| // Copyright (C) 2013 Ryan Curtin | | // Copyright (C) 2013 Ryan Curtin | |
| // Copyright (C) 2008-2014 NICTA (www.nicta.com.au) | | // Copyright (C) 2008-2014 NICTA (www.nicta.com.au) | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| #if !defined(ARMA_USE_LAPACK) | | #if !defined(ARMA_USE_LAPACK) | |
| #define ARMA_USE_LAPACK | | #define ARMA_USE_LAPACK | |
|
| //// Uncomment the above line if you have LAPACK or a high-speed replacemen
t for LAPACK, | | //// Comment out the above line if you don't have LAPACK or a high-speed re
placement for LAPACK, | |
| //// such as Intel MKL, AMD ACML, or the Accelerate framework. | | //// such as Intel MKL, AMD ACML, or the Accelerate framework. | |
| //// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve
rse. | | //// LAPACK is required for matrix decompositions (eg. SVD) and matrix inve
rse. | |
| #endif | | #endif | |
| | | | |
| #if !defined(ARMA_USE_BLAS) | | #if !defined(ARMA_USE_BLAS) | |
| #define ARMA_USE_BLAS | | #define ARMA_USE_BLAS | |
|
| //// Uncomment the above line if you have BLAS or a high-speed replacement
for BLAS, | | //// Comment out the above line if you don't have BLAS or a high-speed repl
acement for BLAS, | |
| //// such as OpenBLAS, GotoBLAS, Intel MKL, AMD ACML, or the Accelerate fra
mework. | | //// such as OpenBLAS, GotoBLAS, Intel MKL, AMD ACML, or the Accelerate fra
mework. | |
| //// BLAS is used for matrix multiplication. | | //// BLAS is used for matrix multiplication. | |
| //// Without BLAS, matrix multiplication will still work, but might be slow
er. | | //// Without BLAS, matrix multiplication will still work, but might be slow
er. | |
| #endif | | #endif | |
| | | | |
| #if !defined(ARMA_USE_ARPACK) | | #if !defined(ARMA_USE_ARPACK) | |
| /* #undef ARMA_USE_ARPACK */ | | /* #undef ARMA_USE_ARPACK */ | |
| //// Uncomment the above line if you have ARPACK or a high-speed replacemen
t for ARPACK. | | //// Uncomment the above line if you have ARPACK or a high-speed replacemen
t for ARPACK. | |
| //// ARPACK is required for eigendecompositions of sparse matrices, eg. eig
s_sym() | | //// ARPACK is required for eigendecompositions of sparse matrices, eg. eig
s_sym() | |
| #endif | | #endif | |
| | | | |
| skipping to change at line 95 | | skipping to change at line 95 | |
| /* #undef ARMA_USE_HDF5 */ | | /* #undef ARMA_USE_HDF5 */ | |
| //// Uncomment the above line to allow the ability to save and load matrice
s stored in HDF5 format; | | //// Uncomment the above line to allow the ability to save and load matrice
s stored in HDF5 format; | |
| //// the hdf5.h header file must be available on your system, | | //// the hdf5.h header file must be available on your system, | |
| //// and you will need to link with the hdf5 library (eg. -lhdf5) | | //// and you will need to link with the hdf5 library (eg. -lhdf5) | |
| #endif | | #endif | |
| | | | |
| /* #undef ARMA_USE_HDF5_ALT */ | | /* #undef ARMA_USE_HDF5_ALT */ | |
| #if defined(ARMA_USE_HDF5_ALT) && defined(ARMA_USE_WRAPPER) | | #if defined(ARMA_USE_HDF5_ALT) && defined(ARMA_USE_WRAPPER) | |
| #undef ARMA_USE_HDF5 | | #undef ARMA_USE_HDF5 | |
| #define ARMA_USE_HDF5 | | #define ARMA_USE_HDF5 | |
|
| | | | |
| | | #define ARMA_HDF5_INCLUDE_DIR / | |
| #endif | | #endif | |
| | | | |
| #if !defined(ARMA_MAT_PREALLOC) | | #if !defined(ARMA_MAT_PREALLOC) | |
| #define ARMA_MAT_PREALLOC 16 | | #define ARMA_MAT_PREALLOC 16 | |
| #endif | | #endif | |
| //// This is the number of preallocated elements used by matrices and vecto
rs; | | //// This is the number of preallocated elements used by matrices and vecto
rs; | |
| //// it must be an integer that is at least 1. | | //// it must be an integer that is at least 1. | |
| //// If you mainly use lots of very small vectors (eg. <= 4 elements), | | //// If you mainly use lots of very small vectors (eg. <= 4 elements), | |
| //// change the number to the size of your vectors. | | //// change the number to the size of your vectors. | |
| | | | |
| | | | |
End of changes. 3 change blocks. |
| 2 lines changed or deleted | | 4 lines changed or added | |
|
| debug.hpp | | debug.hpp | |
|
| // Copyright (C) 2008-2013 Conrad Sanderson | | // Copyright (C) 2008-2014 Conrad Sanderson | |
| // Copyright (C) 2008-2013 NICTA (www.nicta.com.au) | | // Copyright (C) 2008-2014 NICTA (www.nicta.com.au) | |
| // Copyright (C) 2011 Stanislav Funiak | | // Copyright (C) 2011 Stanislav Funiak | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| //! \addtogroup debug | | //! \addtogroup debug | |
| //! @{ | | //! @{ | |
| | | | |
| template<typename T> | | template<typename T> | |
| | | | |
| skipping to change at line 85 | | skipping to change at line 85 | |
| arma_cold | | arma_cold | |
| arma_noinline | | arma_noinline | |
| static | | static | |
| void | | void | |
| arma_stop(const T1& x) | | arma_stop(const T1& x) | |
| { | | { | |
| #if defined(ARMA_PRINT_ERRORS) | | #if defined(ARMA_PRINT_ERRORS) | |
| { | | { | |
| std::ostream& out = get_stream_err1(); | | std::ostream& out = get_stream_err1(); | |
| | | | |
|
| out.flush(); | | | |
| | | | |
| out << '\n'; | | out << '\n'; | |
| out << "error: " << x << '\n'; | | out << "error: " << x << '\n'; | |
| out << '\n'; | | out << '\n'; | |
| out.flush(); | | out.flush(); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| arma_ignore(x); | | arma_ignore(x); | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| skipping to change at line 112 | | skipping to change at line 110 | |
| arma_cold | | arma_cold | |
| arma_noinline | | arma_noinline | |
| static | | static | |
| void | | void | |
| arma_stop_bad_alloc(const T1& x) | | arma_stop_bad_alloc(const T1& x) | |
| { | | { | |
| #if defined(ARMA_PRINT_ERRORS) | | #if defined(ARMA_PRINT_ERRORS) | |
| { | | { | |
| std::ostream& out = get_stream_err2(); | | std::ostream& out = get_stream_err2(); | |
| | | | |
|
| out.flush(); | | | |
| | | | |
| out << '\n'; | | out << '\n'; | |
| out << "error: " << x << '\n'; | | out << "error: " << x << '\n'; | |
| out << '\n'; | | out << '\n'; | |
| out.flush(); | | out.flush(); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| arma_ignore(x); | | arma_ignore(x); | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| skipping to change at line 143 | | skipping to change at line 139 | |
| arma_cold | | arma_cold | |
| arma_noinline | | arma_noinline | |
| static | | static | |
| void | | void | |
| arma_bad(const T1& x, const bool hurl = true) | | arma_bad(const T1& x, const bool hurl = true) | |
| { | | { | |
| #if defined(ARMA_PRINT_ERRORS) | | #if defined(ARMA_PRINT_ERRORS) | |
| { | | { | |
| std::ostream& out = get_stream_err2(); | | std::ostream& out = get_stream_err2(); | |
| | | | |
|
| out.flush(); | | | |
| | | | |
| out << '\n'; | | out << '\n'; | |
| out << "error: " << x << '\n'; | | out << "error: " << x << '\n'; | |
| out << '\n'; | | out << '\n'; | |
| out.flush(); | | out.flush(); | |
| } | | } | |
| #else | | #else | |
| { | | { | |
| arma_ignore(x); | | arma_ignore(x); | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| skipping to change at line 1047 | | skipping to change at line 1041 | |
| | | | |
| std::ostream& out = get_stream_err1(); | | std::ostream& out = get_stream_err1(); | |
| | | | |
| out << "@ ---" << '\n'; | | out << "@ ---" << '\n'; | |
| out << "@ Armadillo " | | out << "@ Armadillo " | |
| << arma_version::major << '.' << arma_version::minor << '.' <<
arma_version::patch | | << arma_version::major << '.' << arma_version::minor << '.' <<
arma_version::patch | |
| << " (" << nickname << ")\n"; | | << " (" << nickname << ")\n"; | |
| | | | |
| out << "@ arma_config::use_wrapper = " << arma_config::use_wrapper
<< '\n'; | | out << "@ arma_config::use_wrapper = " << arma_config::use_wrapper
<< '\n'; | |
| out << "@ arma_config::use_cxx11 = " << arma_config::use_cxx11
<< '\n'; | | out << "@ arma_config::use_cxx11 = " << arma_config::use_cxx11
<< '\n'; | |
|
| | | out << "@ arma_config::openmp = " << arma_config::openmp
<< '\n'; | |
| out << "@ arma_config::lapack = " << arma_config::lapack
<< '\n'; | | out << "@ arma_config::lapack = " << arma_config::lapack
<< '\n'; | |
| out << "@ arma_config::blas = " << arma_config::blas
<< '\n'; | | out << "@ arma_config::blas = " << arma_config::blas
<< '\n'; | |
| out << "@ arma_config::arpack = " << arma_config::arpack
<< '\n'; | | out << "@ arma_config::arpack = " << arma_config::arpack
<< '\n'; | |
| out << "@ arma_config::atlas = " << arma_config::atlas
<< '\n'; | | out << "@ arma_config::atlas = " << arma_config::atlas
<< '\n'; | |
| out << "@ arma_config::hdf5 = " << arma_config::hdf5
<< '\n'; | | out << "@ arma_config::hdf5 = " << arma_config::hdf5
<< '\n'; | |
| out << "@ arma_config::good_comp = " << arma_config::good_comp
<< '\n'; | | out << "@ arma_config::good_comp = " << arma_config::good_comp
<< '\n'; | |
| out << "@ arma_config::extra_code = " << arma_config::extra_code
<< '\n'; | | out << "@ arma_config::extra_code = " << arma_config::extra_code
<< '\n'; | |
| out << "@ arma_config::mat_prealloc = " << arma_config::mat_preallo
c << '\n'; | | out << "@ arma_config::mat_prealloc = " << arma_config::mat_preallo
c << '\n'; | |
| out << "@ sizeof(void*) = " << sizeof(void*) << '\n'; | | out << "@ sizeof(void*) = " << sizeof(void*) << '\n'; | |
| out << "@ sizeof(uword) = " << sizeof(uword) << '\n'; | | out << "@ sizeof(uword) = " << sizeof(uword) << '\n'; | |
| | | | |
End of changes. 5 change blocks. |
| 8 lines changed or deleted | | 3 lines changed or added | |
|
| diskio_meat.hpp | | diskio_meat.hpp | |
| | | | |
| skipping to change at line 887 | | skipping to change at line 887 | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.width(cell_width); | | f.width(cell_width); | |
| } | | } | |
| | | | |
|
| f << x.at(row,col); | | arma_ostream::print_elem(f, x.at(row,col), false); | |
| } | | } | |
| | | | |
| f.put('\n'); | | f.put('\n'); | |
| } | | } | |
| | | | |
| return f.good(); | | return f.good(); | |
| } | | } | |
| | | | |
| //! Save a matrix as raw binary (no header) | | //! Save a matrix as raw binary (no header) | |
| template<typename eT> | | template<typename eT> | |
| | | | |
| skipping to change at line 1005 | | skipping to change at line 1005 | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) | |
| { | | { | |
| f.width(cell_width); | | f.width(cell_width); | |
| } | | } | |
| | | | |
|
| f << x.at(row,col); | | arma_ostream::print_elem(f, x.at(row,col), false); | |
| } | | } | |
| | | | |
| f.put('\n'); | | f.put('\n'); | |
| } | | } | |
| | | | |
| const bool save_okay = f.good(); | | const bool save_okay = f.good(); | |
| | | | |
| f.flags(orig_flags); | | f.flags(orig_flags); | |
| | | | |
| return save_okay; | | return save_okay; | |
| | | | |
| skipping to change at line 1073 | | skipping to change at line 1073 | |
| f.precision(12); | | f.precision(12); | |
| } | | } | |
| | | | |
| uword x_n_rows = x.n_rows; | | uword x_n_rows = x.n_rows; | |
| uword x_n_cols = x.n_cols; | | uword x_n_cols = x.n_cols; | |
| | | | |
| for(uword row=0; row < x_n_rows; ++row) | | for(uword row=0; row < x_n_rows; ++row) | |
| { | | { | |
| for(uword col=0; col < x_n_cols; ++col) | | for(uword col=0; col < x_n_cols; ++col) | |
| { | | { | |
|
| f << x.at(row,col); | | arma_ostream::print_elem(f, x.at(row,col), false); | |
| | | | |
| if( col < (x_n_cols-1) ) | | if( col < (x_n_cols-1) ) | |
| { | | { | |
| f.put(','); | | f.put(','); | |
| } | | } | |
| } | | } | |
| | | | |
| f.put('\n'); | | f.put('\n'); | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 2980 | | skipping to change at line 2980 | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true)
) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true)
) | |
| { | | { | |
| f.width(cell_width); | | f.width(cell_width); | |
| } | | } | |
| | | | |
|
| f << x.at(row,col,slice); | | arma_ostream::print_elem(f, x.at(row,col,slice), false); | |
| } | | } | |
| | | | |
| f.put('\n'); | | f.put('\n'); | |
| } | | } | |
| } | | } | |
| | | | |
| return f.good(); | | return f.good(); | |
| } | | } | |
| | | | |
| //! Save a cube as raw binary (no header) | | //! Save a cube as raw binary (no header) | |
| | | | |
| skipping to change at line 3101 | | skipping to change at line 3101 | |
| { | | { | |
| for(uword col=0; col < x.n_cols; ++col) | | for(uword col=0; col < x.n_cols; ++col) | |
| { | | { | |
| f.put(' '); | | f.put(' '); | |
| | | | |
| if( (is_float<eT>::value == true) || (is_double<eT>::value == true)
) | | if( (is_float<eT>::value == true) || (is_double<eT>::value == true)
) | |
| { | | { | |
| f.width(cell_width); | | f.width(cell_width); | |
| } | | } | |
| | | | |
|
| f << x.at(row,col,slice); | | arma_ostream::print_elem(f, x.at(row,col,slice), false); | |
| } | | } | |
| | | | |
| f.put('\n'); | | f.put('\n'); | |
| } | | } | |
| } | | } | |
| | | | |
| const bool save_okay = f.good(); | | const bool save_okay = f.good(); | |
| | | | |
| f.flags(orig_flags); | | f.flags(orig_flags); | |
| | | | |
| | | | |
End of changes. 5 change blocks. |
| 5 lines changed or deleted | | 5 lines changed or added | |
|
| fn_chol.hpp | | fn_chol.hpp | |
|
| // Copyright (C) 2009-2011 Conrad Sanderson | | // Copyright (C) 2009-2014 Conrad Sanderson | |
| // Copyright (C) 2009-2011 NICTA (www.nicta.com.au) | | // Copyright (C) 2009-2014 NICTA (www.nicta.com.au) | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| //! \addtogroup fn_chol | | //! \addtogroup fn_chol | |
| //! @{ | | //! @{ | |
| | | | |
| template<typename T1> | | template<typename T1> | |
| inline | | inline | |
| const Op<T1, op_chol> | | const Op<T1, op_chol> | |
| chol | | chol | |
| ( | | ( | |
| const Base<typename T1::elem_type,T1>& X, | | const Base<typename T1::elem_type,T1>& X, | |
|
| | | const char* layout = "upper", | |
| const typename arma_blas_type_only<typename T1::elem_type>::result* junk
= 0 | | const typename arma_blas_type_only<typename T1::elem_type>::result* junk
= 0 | |
| ) | | ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| arma_ignore(junk); | | arma_ignore(junk); | |
| | | | |
|
| return Op<T1, op_chol>(X.get_ref()); | | const char sig = (layout != NULL) ? layout[0] : char(0); | |
| | | | |
| | | arma_debug_check( ((sig != 'u') && (sig != 'l')), "chol(): layout must be | |
| | | \"upper\" or \"lower\"" ); | |
| | | | |
| | | return Op<T1, op_chol>(X.get_ref(), ((sig == 'u') ? 0 : 1), 0 ); | |
| } | | } | |
| | | | |
| template<typename T1> | | template<typename T1> | |
| inline | | inline | |
| bool | | bool | |
| chol | | chol | |
| ( | | ( | |
| Mat<typename T1::elem_type>& out, | | Mat<typename T1::elem_type>& out, | |
| const Base<typename T1::elem_type,T1>& X, | | const Base<typename T1::elem_type,T1>& X, | |
|
| | | const char* layout = "upper", | |
| const typename arma_blas_type_only<typename T1::elem_type>::result* junk
= 0 | | const typename arma_blas_type_only<typename T1::elem_type>::result* junk
= 0 | |
| ) | | ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| arma_ignore(junk); | | arma_ignore(junk); | |
| | | | |
| try | | try | |
| { | | { | |
|
| out = chol(X); | | out = chol(X, layout); | |
| } | | } | |
| catch(std::runtime_error&) | | catch(std::runtime_error&) | |
| { | | { | |
| return false; | | return false; | |
| } | | } | |
| | | | |
| return true; | | return true; | |
| } | | } | |
| | | | |
| //! @} | | //! @} | |
| | | | |
End of changes. 5 change blocks. |
| 4 lines changed or deleted | | 11 lines changed or added | |
|
| mul_gemm.hpp | | mul_gemm.hpp | |
|
| // Copyright (C) 2008-2013 Conrad Sanderson | | // Copyright (C) 2008-2014 Conrad Sanderson | |
| // Copyright (C) 2008-2013 NICTA (www.nicta.com.au) | | // Copyright (C) 2008-2014 NICTA (www.nicta.com.au) | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| //! \addtogroup gemm | | //! \addtogroup gemm | |
| //! @{ | | //! @{ | |
| | | | |
| //! for tiny square matrices, size <= 4x4 | | //! for tiny square matrices, size <= 4x4 | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
| | | | |
| skipping to change at line 81 | | skipping to change at line 81 | |
| const uword B_n_cols = B.n_cols; | | const uword B_n_cols = B.n_cols; | |
| | | | |
| if( (do_trans_A == false) && (do_trans_B == false) ) | | if( (do_trans_A == false) && (do_trans_B == false) ) | |
| { | | { | |
| arma_aligned podarray<eT> tmp(A_n_cols); | | arma_aligned podarray<eT> tmp(A_n_cols); | |
| | | | |
| eT* A_rowdata = tmp.memptr(); | | eT* A_rowdata = tmp.memptr(); | |
| | | | |
| for(uword row_A=0; row_A < A_n_rows; ++row_A) | | for(uword row_A=0; row_A < A_n_rows; ++row_A) | |
| { | | { | |
|
| //tmp.copy_row(A, row_A); | | tmp.copy_row(A, row_A); | |
| const eT acc0 = op_dot::dot_and_copy_row(A_rowdata, A, row_A, B.col | | | |
| ptr(0), A_n_cols); | | | |
| | | | |
|
| if( (use_alpha == false) && (use_beta == false) ) { C.at(row_ | | for(uword col_B=0; col_B < B_n_cols; ++col_B) | |
| A,0) = acc0; } | | | |
| else if( (use_alpha == true ) && (use_beta == false) ) { C.at(row_ | | | |
| A,0) = alpha*acc0; } | | | |
| else if( (use_alpha == false) && (use_beta == true ) ) { C.at(row_ | | | |
| A,0) = acc0 + beta*C.at(row_A,0); } | | | |
| else if( (use_alpha == true ) && (use_beta == true ) ) { C.at(row_ | | | |
| A,0) = alpha*acc0 + beta*C.at(row_A,0); } | | | |
| | | | |
| //for(uword col_B=0; col_B < B_n_cols; ++col_B) | | | |
| for(uword col_B=1; col_B < B_n_cols; ++col_B) | | | |
| { | | { | |
| const eT acc = op_dot::direct_dot_arma(B_n_rows, A_rowdata, B.col
ptr(col_B)); | | const eT acc = op_dot::direct_dot_arma(B_n_rows, A_rowdata, B.col
ptr(col_B)); | |
| | | | |
| if( (use_alpha == false) && (use_beta == false) ) { C.at(ro
w_A,col_B) = acc; } | | if( (use_alpha == false) && (use_beta == false) ) { C.at(ro
w_A,col_B) = acc; } | |
| else if( (use_alpha == true ) && (use_beta == false) ) { C.at(ro
w_A,col_B) = alpha*acc; } | | else if( (use_alpha == true ) && (use_beta == false) ) { C.at(ro
w_A,col_B) = alpha*acc; } | |
| else if( (use_alpha == false) && (use_beta == true ) ) { C.at(ro
w_A,col_B) = acc + beta*C.at(row_A,col_B); } | | else if( (use_alpha == false) && (use_beta == true ) ) { C.at(ro
w_A,col_B) = acc + beta*C.at(row_A,col_B); } | |
| else if( (use_alpha == true ) && (use_beta == true ) ) { C.at(ro
w_A,col_B) = alpha*acc + beta*C.at(row_A,col_B); } | | else if( (use_alpha == true ) && (use_beta == true ) ) { C.at(ro
w_A,col_B) = alpha*acc + beta*C.at(row_A,col_B); } | |
| } | | } | |
| } | | } | |
| } | | } | |
| | | | |
| skipping to change at line 183 | | skipping to change at line 176 | |
| const TA& A, | | const TA& A, | |
| const TB& B, | | const TB& B, | |
| const eT alpha = eT(1), | | const eT alpha = eT(1), | |
| const eT beta = eT(0), | | const eT beta = eT(0), | |
| const typename arma_not_cx<eT>::result* junk = 0 | | const typename arma_not_cx<eT>::result* junk = 0 | |
| ) | | ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| arma_ignore(junk); | | arma_ignore(junk); | |
| | | | |
|
| const uword A_n_rows = A.n_rows; | | gemm_emul_large<do_trans_A, do_trans_B, use_alpha, use_beta>::apply(C, | |
| const uword A_n_cols = A.n_cols; | | A, B, alpha, beta); | |
| | | | |
| const uword B_n_rows = B.n_rows; | | | |
| const uword B_n_cols = B.n_cols; | | | |
| | | | |
| if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) && (A_n_rows == B_n_rows) | | | |
| && (B_n_rows == B_n_cols) ) | | | |
| { | | | |
| if(do_trans_B == false) | | | |
| { | | | |
| gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, B, a | | | |
| lpha, beta); | | | |
| } | | | |
| else | | | |
| { | | | |
| Mat<eT> BB(A_n_rows, A_n_rows); | | | |
| op_strans::apply_mat_noalias_tinysq(BB, B); | | | |
| | | | |
| gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, BB, | | | |
| alpha, beta); | | | |
| } | | | |
| } | | | |
| else | | | |
| { | | | |
| gemm_emul_large<do_trans_A, do_trans_B, use_alpha, use_beta>::apply(C | | | |
| , A, B, alpha, beta); | | | |
| } | | | |
| } | | } | |
| | | | |
| template<typename eT> | | template<typename eT> | |
| arma_hot | | arma_hot | |
| inline | | inline | |
| static | | static | |
| void | | void | |
| apply | | apply | |
| ( | | ( | |
| Mat<eT>& C, | | Mat<eT>& C, | |
| | | | |
| skipping to change at line 238 | | skipping to change at line 208 | |
| | | | |
| Mat<eT> tmp_A; | | Mat<eT> tmp_A; | |
| Mat<eT> tmp_B; | | Mat<eT> tmp_B; | |
| | | | |
| if(do_trans_A) { op_htrans::apply_mat_noalias(tmp_A, A); } | | if(do_trans_A) { op_htrans::apply_mat_noalias(tmp_A, A); } | |
| if(do_trans_B) { op_htrans::apply_mat_noalias(tmp_B, B); } | | if(do_trans_B) { op_htrans::apply_mat_noalias(tmp_B, B); } | |
| | | | |
| const Mat<eT>& AA = (do_trans_A == false) ? A : tmp_A; | | const Mat<eT>& AA = (do_trans_A == false) ? A : tmp_A; | |
| const Mat<eT>& BB = (do_trans_B == false) ? B : tmp_B; | | const Mat<eT>& BB = (do_trans_B == false) ? B : tmp_B; | |
| | | | |
|
| const uword A_n_rows = AA.n_rows; | | gemm_emul_large<false, false, use_alpha, use_beta>::apply(C, AA, BB, al | |
| const uword A_n_cols = AA.n_cols; | | pha, beta); | |
| | | | |
| const uword B_n_rows = BB.n_rows; | | | |
| const uword B_n_cols = BB.n_cols; | | | |
| | | | |
| if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) && (A_n_rows == B_n_rows) | | | |
| && (B_n_rows == B_n_cols) ) | | | |
| { | | | |
| gemm_emul_tinysq<false, use_alpha, use_beta>::apply(C, AA, BB, alpha, | | | |
| beta); | | | |
| } | | | |
| else | | | |
| { | | | |
| gemm_emul_large<false, false, use_alpha, use_beta>::apply(C, AA, BB, | | | |
| alpha, beta); | | | |
| } | | | |
| } | | } | |
| | | | |
| }; | | }; | |
| | | | |
| //! \brief | | //! \brief | |
| //! Wrapper for ATLAS/BLAS dgemm function, using template arguments to cont
rol the arguments passed to dgemm. | | //! Wrapper for ATLAS/BLAS dgemm function, using template arguments to cont
rol the arguments passed to dgemm. | |
| //! Matrix 'C' is assumed to have been set to the correct size (i.e. taking
into account transposes) | | //! Matrix 'C' is assumed to have been set to the correct size (i.e. taking
into account transposes) | |
| | | | |
| template<const bool do_trans_A=false, const bool do_trans_B=false, const bo
ol use_alpha=false, const bool use_beta=false> | | template<const bool do_trans_A=false, const bool do_trans_B=false, const bo
ol use_alpha=false, const bool use_beta=false> | |
| class gemm | | class gemm | |
| | | | |
| skipping to change at line 273 | | skipping to change at line 230 | |
| public: | | public: | |
| | | | |
| template<typename eT, typename TA, typename TB> | | template<typename eT, typename TA, typename TB> | |
| inline | | inline | |
| static | | static | |
| void | | void | |
| apply_blas_type( Mat<eT>& C, const TA& A, const TB& B, const eT alpha = e
T(1), const eT beta = eT(0) ) | | apply_blas_type( Mat<eT>& C, const TA& A, const TB& B, const eT alpha = e
T(1), const eT beta = eT(0) ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
|
| const uword threshold = (is_Mat_fixed<TA>::value && is_Mat_fixed<TB>::v | | if( (A.n_rows <= 4) && (A.n_rows == A.n_cols) && (A.n_rows == B.n_rows) | |
| alue) | | && (B.n_rows == B.n_cols) && (is_cx<eT>::no) ) | |
| ? (is_cx<eT>::yes ? 16u : 64u) | | | |
| : (is_cx<eT>::yes ? 16u : 48u); | | | |
| | | | |
| if( (A.n_elem <= threshold) && (B.n_elem <= threshold) ) | | | |
| { | | { | |
|
| gemm_emul<do_trans_A, do_trans_B, use_alpha, use_beta>::apply(C,A,B,a | | if(do_trans_B == false) | |
| lpha,beta); | | { | |
| | | gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, B, a | |
| | | lpha, beta); | |
| | | } | |
| | | else | |
| | | { | |
| | | Mat<eT> BB(B.n_rows, B.n_rows); | |
| | | | |
| | | op_strans::apply_mat_noalias_tinysq(BB, B); | |
| | | | |
| | | gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, BB, | |
| | | alpha, beta); | |
| | | } | |
| } | | } | |
| else | | else | |
| { | | { | |
| #if defined(ARMA_USE_ATLAS) | | #if defined(ARMA_USE_ATLAS) | |
| { | | { | |
| arma_extra_debug_print("atlas::cblas_gemm()"); | | arma_extra_debug_print("atlas::cblas_gemm()"); | |
| | | | |
| atlas::cblas_gemm<eT> | | atlas::cblas_gemm<eT> | |
| ( | | ( | |
| atlas::CblasColMajor, | | atlas::CblasColMajor, | |
| | | | |
End of changes. 7 change blocks. |
| 69 lines changed or deleted | | 24 lines changed or added | |
|
| mul_gemv.hpp | | mul_gemv.hpp | |
|
| // Copyright (C) 2008-2013 Conrad Sanderson | | // Copyright (C) 2008-2014 Conrad Sanderson | |
| // Copyright (C) 2008-2013 NICTA (www.nicta.com.au) | | // Copyright (C) 2008-2014 NICTA (www.nicta.com.au) | |
| // | | // | |
| // This Source Code Form is subject to the terms of the Mozilla Public | | // This Source Code Form is subject to the terms of the Mozilla Public | |
| // License, v. 2.0. If a copy of the MPL was not distributed with this | | // License, v. 2.0. If a copy of the MPL was not distributed with this | |
| // file, You can obtain one at http://mozilla.org/MPL/2.0/. | | // file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
| | | | |
| //! \addtogroup gemv | | //! \addtogroup gemv | |
| //! @{ | | //! @{ | |
| | | | |
| //! for tiny square matrices, size <= 4x4 | | //! for tiny square matrices, size <= 4x4 | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
| | | | |
| skipping to change at line 120 | | skipping to change at line 120 | |
| } | | } | |
| break; | | break; | |
| | | | |
| default: | | default: | |
| ; | | ; | |
| } | | } | |
| } | | } | |
| | | | |
| }; | | }; | |
| | | | |
|
| class gemv_emul_large_helper | | class gemv_emul_helper | |
| { | | { | |
| public: | | public: | |
| | | | |
| template<typename eT, typename TA> | | template<typename eT, typename TA> | |
| arma_hot | | arma_hot | |
| inline | | inline | |
| static | | static | |
| typename arma_not_cx<eT>::result | | typename arma_not_cx<eT>::result | |
| dot_row_col( const TA& A, const eT* x, const uword row, const uword N ) | | dot_row_col( const TA& A, const eT* x, const uword row, const uword N ) | |
| { | | { | |
| | | | |
| skipping to change at line 189 | | skipping to change at line 189 | |
| return std::complex<T>(val_real, val_imag); | | return std::complex<T>(val_real, val_imag); | |
| } | | } | |
| | | | |
| }; | | }; | |
| | | | |
| //! \brief | | //! \brief | |
| //! Partial emulation of ATLAS/BLAS gemv(). | | //! Partial emulation of ATLAS/BLAS gemv(). | |
| //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | | //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | |
| | | | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
|
| class gemv_emul_large | | class gemv_emul | |
| { | | { | |
| public: | | public: | |
| | | | |
| template<typename eT, typename TA> | | template<typename eT, typename TA> | |
| arma_hot | | arma_hot | |
| inline | | inline | |
| static | | static | |
| void | | void | |
| apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT
beta = eT(0) ) | | apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT
beta = eT(0) ) | |
| { | | { | |
| | | | |
| skipping to change at line 219 | | skipping to change at line 219 | |
| const eT acc = op_dot::direct_dot_arma(A_n_cols, A.memptr(), x); | | const eT acc = op_dot::direct_dot_arma(A_n_cols, A.memptr(), x); | |
| | | | |
| if( (use_alpha == false) && (use_beta == false) ) { y[0] =
acc; } | | if( (use_alpha == false) && (use_beta == false) ) { y[0] =
acc; } | |
| else if( (use_alpha == true ) && (use_beta == false) ) { y[0] = al
pha*acc; } | | else if( (use_alpha == true ) && (use_beta == false) ) { y[0] = al
pha*acc; } | |
| else if( (use_alpha == false) && (use_beta == true ) ) { y[0] =
acc + beta*y[0]; } | | else if( (use_alpha == false) && (use_beta == true ) ) { y[0] =
acc + beta*y[0]; } | |
| else if( (use_alpha == true ) && (use_beta == true ) ) { y[0] = al
pha*acc + beta*y[0]; } | | else if( (use_alpha == true ) && (use_beta == true ) ) { y[0] = al
pha*acc + beta*y[0]; } | |
| } | | } | |
| else | | else | |
| for(uword row=0; row < A_n_rows; ++row) | | for(uword row=0; row < A_n_rows; ++row) | |
| { | | { | |
|
| const eT acc = gemv_emul_large_helper::dot_row_col(A, x, row, A_n_c
ols); | | const eT acc = gemv_emul_helper::dot_row_col(A, x, row, A_n_cols); | |
| | | | |
| if( (use_alpha == false) && (use_beta == false) ) { y[row] =
acc; } | | if( (use_alpha == false) && (use_beta == false) ) { y[row] =
acc; } | |
| else if( (use_alpha == true ) && (use_beta == false) ) { y[row] =
alpha*acc; } | | else if( (use_alpha == true ) && (use_beta == false) ) { y[row] =
alpha*acc; } | |
| else if( (use_alpha == false) && (use_beta == true ) ) { y[row] =
acc + beta*y[row]; } | | else if( (use_alpha == false) && (use_beta == true ) ) { y[row] =
acc + beta*y[row]; } | |
| else if( (use_alpha == true ) && (use_beta == true ) ) { y[row] =
alpha*acc + beta*y[row]; } | | else if( (use_alpha == true ) && (use_beta == true ) ) { y[row] =
alpha*acc + beta*y[row]; } | |
| } | | } | |
| } | | } | |
| else | | else | |
| if(do_trans_A == true) | | if(do_trans_A == true) | |
| { | | { | |
|
| for(uword col=0; col < A_n_cols; ++col) | | if(is_cx<eT>::no) | |
| { | | { | |
|
| // col is interpreted as row when storing the results in 'y' | | for(uword col=0; col < A_n_cols; ++col) | |
| | | { | |
| // const eT* A_coldata = A.colptr(col); | | // col is interpreted as row when storing the results in 'y' | |
| // | | | |
| // eT acc = eT(0); | | | |
| // for(uword row=0; row < A_n_rows; ++row) | | | |
| // { | | | |
| // acc += A_coldata[row] * x[row]; | | | |
| // } | | | |
| | | | |
| const eT acc = op_dot::direct_dot_arma(A_n_rows, A.colptr(col), x); | | | |
| | | | |
| if( (use_alpha == false) && (use_beta == false) ) { y[col] = | | | |
| acc; } | | | |
| else if( (use_alpha == true ) && (use_beta == false) ) { y[col] = | | | |
| alpha*acc; } | | | |
| else if( (use_alpha == false) && (use_beta == true ) ) { y[col] = | | | |
| acc + beta*y[col]; } | | | |
| else if( (use_alpha == true ) && (use_beta == true ) ) { y[col] = | | | |
| alpha*acc + beta*y[col]; } | | | |
| | | | |
|
| | | // const eT* A_coldata = A.colptr(col); | |
| | | // | |
| | | // eT acc = eT(0); | |
| | | // for(uword row=0; row < A_n_rows; ++row) | |
| | | // { | |
| | | // acc += A_coldata[row] * x[row]; | |
| | | // } | |
| | | | |
| | | const eT acc = op_dot::direct_dot_arma(A_n_rows, A.colptr(col), x | |
| | | ); | |
| | | | |
| | | if( (use_alpha == false) && (use_beta == false) ) { y[col] | |
| | | = acc; } | |
| | | else if( (use_alpha == true ) && (use_beta == false) ) { y[col] | |
| | | = alpha*acc; } | |
| | | else if( (use_alpha == false) && (use_beta == true ) ) { y[col] | |
| | | = acc + beta*y[col]; } | |
| | | else if( (use_alpha == true ) && (use_beta == true ) ) { y[col] | |
| | | = alpha*acc + beta*y[col]; } | |
| | | } | |
| } | | } | |
|
| } | | else | |
| } | | { | |
| | | Mat<eT> AA; | |
| }; | | | |
| | | | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo | | | |
| l use_beta=false> | | | |
| class gemv_emul | | | |
| { | | | |
| public: | | | |
| | | | |
| template<typename eT, typename TA> | | | |
| arma_hot | | | |
| inline | | | |
| static | | | |
| void | | | |
| apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT | | | |
| beta = eT(0), const typename arma_not_cx<eT>::result* junk = 0 ) | | | |
| { | | | |
| arma_extra_debug_sigprint(); | | | |
| arma_ignore(junk); | | | |
| | | | |
|
| const uword A_n_rows = A.n_rows; | | op_htrans::apply_mat_noalias(AA, A); | |
| const uword A_n_cols = A.n_cols; | | | |
| | | | |
|
| if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) ) | | gemv_emul<false, use_alpha, use_beta>::apply(y, AA, x, alpha, beta) | |
| { | | ; | |
| gemv_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alp | | } | |
| ha, beta); | | | |
| } | | | |
| else | | | |
| { | | | |
| gemv_emul_large<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alph | | | |
| a, beta); | | | |
| } | | } | |
| } | | } | |
| | | | |
|
| template<typename eT> | | | |
| arma_hot | | | |
| inline | | | |
| static | | | |
| void | | | |
| apply( eT* y, const Mat<eT>& A, const eT* x, const eT alpha = eT(1), cons | | | |
| t eT beta = eT(0), const typename arma_cx_only<eT>::result* junk = 0 ) | | | |
| { | | | |
| arma_extra_debug_sigprint(); | | | |
| arma_ignore(junk); | | | |
| | | | |
| Mat<eT> tmp_A; | | | |
| | | | |
| if(do_trans_A) | | | |
| { | | | |
| op_htrans::apply_mat_noalias(tmp_A, A); | | | |
| } | | | |
| | | | |
| const Mat<eT>& AA = (do_trans_A == false) ? A : tmp_A; | | | |
| | | | |
| const uword AA_n_rows = AA.n_rows; | | | |
| const uword AA_n_cols = AA.n_cols; | | | |
| | | | |
| if( (AA_n_rows <= 4) && (AA_n_rows == AA_n_cols) ) | | | |
| { | | | |
| gemv_emul_tinysq<false, use_alpha, use_beta>::apply(y, AA, x, alpha, | | | |
| beta); | | | |
| } | | | |
| else | | | |
| { | | | |
| gemv_emul_large<false, use_alpha, use_beta>::apply(y, AA, x, alpha, b | | | |
| eta); | | | |
| } | | | |
| } | | | |
| }; | | }; | |
| | | | |
| //! \brief | | //! \brief | |
| //! Wrapper for ATLAS/BLAS gemv function, using template arguments to contr
ol the arguments passed to gemv. | | //! Wrapper for ATLAS/BLAS gemv function, using template arguments to contr
ol the arguments passed to gemv. | |
| //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | | //! 'y' is assumed to have been set to the correct size (i.e. taking into a
ccount the transpose) | |
| | | | |
| template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | | template<const bool do_trans_A=false, const bool use_alpha=false, const boo
l use_beta=false> | |
| class gemv | | class gemv | |
| { | | { | |
| public: | | public: | |
| | | | |
| template<typename eT, typename TA> | | template<typename eT, typename TA> | |
| inline | | inline | |
| static | | static | |
| void | | void | |
| apply_blas_type( eT* y, const TA& A, const eT* x, const eT alpha = eT(1),
const eT beta = eT(0) ) | | apply_blas_type( eT* y, const TA& A, const eT* x, const eT alpha = eT(1),
const eT beta = eT(0) ) | |
| { | | { | |
| arma_extra_debug_sigprint(); | | arma_extra_debug_sigprint(); | |
| | | | |
|
| //const uword threshold = (is_cx<eT>::yes) ? 16u : 64u; | | if( (A.n_rows <= 4) && (A.n_rows == A.n_cols) && (is_cx<eT>::no) ) | |
| const uword threshold = (is_cx<eT>::yes) ? 64u : 100u; | | | |
| | | | |
| if(A.n_elem <= threshold) | | | |
| { | | { | |
|
| gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta); | | gemv_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alp
ha, beta); | |
| } | | } | |
| else | | else | |
| { | | { | |
| #if defined(ARMA_USE_ATLAS) | | #if defined(ARMA_USE_ATLAS) | |
| { | | { | |
| if(is_cx<eT>::no) | | if(is_cx<eT>::no) | |
| { | | { | |
| // use gemm() instead of gemv() to work around a speed issue in A
tlas 3.8.4 | | // use gemm() instead of gemv() to work around a speed issue in A
tlas 3.8.4 | |
| | | | |
| arma_extra_debug_print("atlas::cblas_gemm()"); | | arma_extra_debug_print("atlas::cblas_gemm()"); | |
| | | | |
End of changes. 13 change blocks. |
| 97 lines changed or deleted | | 38 lines changed or added | |
|