| cublas.h | | cublas.h | |
| /* | | /* | |
|
| * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. | | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. | |
| * | | * | |
| * NOTICE TO USER: | | * NOTICE TO USER: | |
| * | | * | |
| * This source code is subject to NVIDIA ownership rights under U.S. and | | * This source code is subject to NVIDIA ownership rights under U.S. and | |
| * international Copyright laws. Users and possessors of this source code | | * international Copyright laws. Users and possessors of this source code | |
| * are hereby granted a nonexclusive, royalty-free license to use this code | | * are hereby granted a nonexclusive, royalty-free license to use this code | |
| * in individual and commercial software. | | * in individual and commercial software. | |
| * | | * | |
| * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE | | * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE | |
| * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR | | * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR | |
| | | | |
| skipping to change at line 554 | | skipping to change at line 554 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasSrot (int n, float *x, int incx, float *y, int incy, | | void CUBLASAPI cublasSrot (int n, float *x, int incx, float *y, int incy, | |
| float sc, float ss); | | float sc, float ss); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| * cublasSrotg (float *sa, float *sb, float *sc, float *ss) | | * cublasSrotg (float *host_sa, float *host_sb, float *host_sc, float *host
_ss) | |
| * | | * | |
| * constructs the Givens tranformation | | * constructs the Givens tranformation | |
| * | | * | |
| * ( sc ss ) | | * ( sc ss ) | |
| * G = ( ) , sc^2 + ss^2 = 1, | | * G = ( ) , sc^2 + ss^2 = 1, | |
| * (-ss sc ) | | * (-ss sc ) | |
| * | | * | |
| * which zeros the second entry of the 2-vector transpose(sa, sb). | | * which zeros the second entry of the 2-vector transpose(sa, sb). | |
| * | | * | |
| * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The | | * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The | |
| * value of sb is overwritten by a value z which allows sc and ss to be | | * value of sb is overwritten by a value z which allows sc and ss to be | |
| * recovered by the following algorithm: | | * recovered by the following algorithm: | |
| * | | * | |
| * if z=1 set sc = 0.0 and ss = 1.0 | | * if z=1 set sc = 0.0 and ss = 1.0 | |
| * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z | | * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z | |
| * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) | | * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) | |
| * | | * | |
| * The function srot (n, x, incx, y, incy, sc, ss) normally is called next | | * The function srot (n, x, incx, y, incy, sc, ss) normally is called next | |
| * to apply the transformation to a 2 x n matrix. | | * to apply the transformation to a 2 x n matrix. | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * sa single precision scalar | | * sa single precision scalar | |
| * sb single precision scalar | | * sb single precision scalar | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * sa single precision r | | * sa single precision r | |
| * sb single precision z | | * sb single precision z | |
| * sc single precision result | | * sc single precision result | |
| * ss single precision result | | * ss single precision result | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/srotg.f | | * Reference: http://www.netlib.org/blas/srotg.f | |
| * | | * | |
| * This function does not set any error status. | | * This function does not set any error status. | |
| */ | | */ | |
|
| void CUBLASAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss); | | void CUBLASAPI cublasSrotg (float *host_sa, float *host_sb, float *host_sc,
float *host_ss); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasSrotm (int n, float *x, int incx, float *y, int incy, | | * cublasSrotm (int n, float *x, int incx, float *y, int incy, | |
| * const float* sparam) | | * const float* sparam) | |
| * | | * | |
| * applies the modified Givens transformation, h, to the 2 x n matrix | | * applies the modified Givens transformation, h, to the 2 x n matrix | |
| * | | * | |
| * ( transpose(x) ) | | * ( transpose(x) ) | |
| * ( transpose(y) ) | | * ( transpose(y) ) | |
| | | | |
| skipping to change at line 644 | | skipping to change at line 646 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, | | void CUBLASAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, | |
| const float* sparam); | | const float* sparam); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| * cublasSrotmg (float *psd1, float *psd2, float *psx1, const float *psy1, | | * cublasSrotmg (float *host_psd1, float *host_psd2, float *host_psx1, cons | |
| * float *sparam) | | t float *host_psy1, | |
| | | * float *host_sparam) | |
| * | | * | |
| * constructs the modified Givens transformation matrix h which zeros | | * constructs the modified Givens transformation matrix h which zeros | |
| * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*s
y1). | | * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*s
y1). | |
| * With sparam[0] = sflag, h has one of the following forms: | | * With sparam[0] = sflag, h has one of the following forms: | |
| * | | * | |
| * sflag = -1.0f sflag = 0.0f sflag = 1.0f sflag = -2.0f | | * sflag = -1.0f sflag = 0.0f sflag = 1.0f sflag = -2.0f | |
| * | | * | |
| * (sh00 sh01) (1.0f sh01) (sh00 1.0f) (1.0f 0.0f) | | * (sh00 sh01) (1.0f sh01) (sh00 1.0f) (1.0f 0.0f) | |
| * h = ( ) ( ) ( ) ( ) | | * h = ( ) ( ) ( ) ( ) | |
| * (sh10 sh11) (sh10 1.0f) (-1.0f sh11) (0.0f 1.0f) | | * (sh10 sh11) (sh10 1.0f) (-1.0f sh11) (0.0f 1.0f) | |
| * | | * | |
| * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, | | * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, | |
| * respectively. Values of 1.0f, -1.0f, or 0.0f implied by the value | | * respectively. Values of 1.0f, -1.0f, or 0.0f implied by the value | |
| * of sflag are not stored in sparam. | | * of sflag are not stored in sparam. | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * sd1 single precision scalar | | * sd1 single precision scalar | |
| * sd2 single precision scalar | | * sd2 single precision scalar | |
| * sx1 single precision scalar | | * sx1 single precision scalar | |
| * sy1 single precision scalar | | * sy1 single precision scalar | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| | | | |
| skipping to change at line 682 | | skipping to change at line 686 | |
| * sx1 changed to represent the effect of the transformation | | * sx1 changed to represent the effect of the transformation | |
| * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | | * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | |
| * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | | * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | |
| * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | | * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | |
| * and sprams[4] contains sh11. | | * and sprams[4] contains sh11. | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/srotmg.f | | * Reference: http://www.netlib.org/blas/srotmg.f | |
| * | | * | |
| * This functions does not set any error status. | | * This functions does not set any error status. | |
| */ | | */ | |
|
| void CUBLASAPI cublasSrotmg (float *sd1, float *sd2, float *sx1, | | void CUBLASAPI cublasSrotmg (float *host_sd1, float *host_sd2, float *host_ | |
| const float *sy1, float* sparam); | | sx1, | |
| | | const float *host_sy1, float* host_sparam); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * sscal (int n, float alpha, float *x, int incx) | | * sscal (int n, float alpha, float *x, int incx) | |
| * | | * | |
| * replaces single precision vector x with single precision alpha * x. For
i | | * replaces single precision vector x with single precision alpha * x. For
i | |
| * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | | * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | |
| * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | | * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | |
| * | | * | |
| * Input | | * Input | |
| | | | |
| skipping to change at line 819 | | skipping to change at line 823 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex
*y, | | void CUBLASAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex
*y, | |
| int incy); | | int incy); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| | | * cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex | |
| | | *y, int incy) | |
| | | * | |
| | | * copies the double-complex vector x to the double-complex vector y. For | |
| | | * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 | |
| | | if | |
| | | * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar | |
| | | * way using incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y contains double complex vector x | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zcopy.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDo | |
| | | ubleComplex *y, | |
| | | int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| * cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) | | * cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) | |
| * | | * | |
| * replaces single-complex vector x with single-complex alpha * x. For i | | * replaces single-complex vector x with single-complex alpha * x. For i | |
| * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | | * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | |
| * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | | * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * n number of elements in input vectors | | * n number of elements in input vectors | |
| * alpha single-complex scalar multiplier | | * alpha single-complex scalar multiplier | |
| | | | |
| skipping to change at line 849 | | skipping to change at line 886 | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx)
; | | void CUBLASAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx)
; | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| * cublasCrotg (cuComplex *ca, cuComplex cb, float *sc, cuComplex *cs) | | * cublasCrotg (cuComplex *host_ca, cuComplex cb, float *host_sc, cuComplex
*host_cs) | |
| * | | * | |
| * constructs the complex Givens tranformation | | * constructs the complex Givens tranformation | |
| * | | * | |
| * ( sc cs ) | | * ( sc cs ) | |
| * G = ( ) , sc^2 + cabs(cs)^2 = 1, | | * G = ( ) , sc^2 + cabs(cs)^2 = 1, | |
| * (-cs sc ) | | * (-cs sc ) | |
| * | | * | |
| * which zeros the second entry of the complex 2-vector transpose(ca, cb). | | * which zeros the second entry of the complex 2-vector transpose(ca, cb). | |
| * | | * | |
| * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The | | * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The | |
| * function crot (n, x, incx, y, incy, sc, cs) is normally called next | | * function crot (n, x, incx, y, incy, sc, cs) is normally called next | |
| * to apply the transformation to a 2 x n matrix. | | * to apply the transformation to a 2 x n matrix. | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * ca single-precision complex precision scalar | | * ca single-precision complex precision scalar | |
| * cb single-precision complex scalar | | * cb single-precision complex scalar | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * ca single-precision complex ca/cabs(ca)*norm(ca,cb) | | * ca single-precision complex ca/cabs(ca)*norm(ca,cb) | |
| * sc single-precision cosine component of rotation matrix | | * sc single-precision cosine component of rotation matrix | |
| * cs single-precision complex sine component of rotation matrix | | * cs single-precision complex sine component of rotation matrix | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/crotg.f | | * Reference: http://www.netlib.org/blas/crotg.f | |
| * | | * | |
| * This function does not set any error status. | | * This function does not set any error status. | |
| */ | | */ | |
|
| __host__ void CUBLASAPI cublasCrotg (cuComplex *pca, cuComplex cb, float *p | | __host__ void CUBLASAPI cublasCrotg (cuComplex *host_ca, cuComplex cb, floa | |
| sc, | | t *host_sc, | |
| cuComplex *pcs); | | cuComplex *host_cs); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, int incy, float
sc, | | * cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, int incy, float
sc, | |
| * cuComplex cs) | | * cuComplex cs) | |
| * | | * | |
| * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x
) ) | | * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x
) ) | |
| * (-conj(cs) sc) ( transpose(y
) ) | | * (-conj(cs) sc) ( transpose(y
) ) | |
| * | | * | |
| * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1
if | | * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1
if | |
| | | | |
| skipping to change at line 1024 | | skipping to change at line 1063 | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, | | void CUBLASAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, | |
| int incy); | | int incy); | |
| | | | |
| /* | | /* | |
|
| | | * void | |
| | | * cublasZswap (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex | |
| | | *y, int incy) | |
| | | * | |
| | | * interchanges the double-complex vector x with the double-complex vector | |
| | | y. | |
| | | * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], w | |
| | | here | |
| | | * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in | |
| | | a | |
| | | * similar way using incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x contains-double complex vector y | |
| | | * y contains-double complex vector x | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zswap.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleCo | |
| | | mplex *y, | |
| | | int incy); | |
| | | | |
| | | /* | |
| * cuComplex | | * cuComplex | |
| * cdotu (int n, const cuComplex *x, int incx, const cuComplex *y, int incy
) | | * cdotu (int n, const cuComplex *x, int incx, const cuComplex *y, int incy
) | |
| * | | * | |
| * computes the dot product of two single-complex vectors. It returns the | | * computes the dot product of two single-complex vectors. It returns the | |
| * dot product of the single-complex vectors x and y if successful, and com
plex | | * dot product of the single-complex vectors x and y if successful, and com
plex | |
| * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * inc
x] * | | * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * inc
x] * | |
| * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * inc
x; | | * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * inc
x; | |
| * ly is defined in a similar way using incy. | | * ly is defined in a similar way using incy. | |
| * | | * | |
| * Input | | * Input | |
| | | | |
| skipping to change at line 1212 | | skipping to change at line 1285 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| float CUBLASAPI cublasScnrm2 (int n, const cuComplex *x, int incx); | | float CUBLASAPI cublasScnrm2 (int n, const cuComplex *x, int incx); | |
| | | | |
| /* ----------------- CUBLAS double-complex BLAS1 functions ----------------
- */ | | /* ----------------- CUBLAS double-complex BLAS1 functions ----------------
- */ | |
| | | | |
| /* | | /* | |
|
| | | * void | |
| | | * cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int | |
| | | incx, | |
| | | * cuDoubleComplex *y, int incy) | |
| | | * | |
| | | * multiplies double-complex vector x by double-complex scalar alpha and ad | |
| | | ds | |
| | | * the result to double-complex vector y; that is, it overwrites double-com | |
| | | plex | |
| | | * y with double-complex alpha * x + y. For i = 0 to n - 1, it replaces | |
| | | * y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * incy], where | |
| | | * lx = 0 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in | |
| | | a | |
| | | * similar way using incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * alpha double-complex scalar multiplier | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y double-complex result (unchanged if n <= 0) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zaxpy.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleCom | |
| | | plex *x, | |
| | | int incx, cuDoubleComplex *y, int incy); | |
| | | | |
| | | /* | |
| * cuDoubleComplex | | * cuDoubleComplex | |
| * zdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex
*y, int incy) | | * zdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex
*y, int incy) | |
| * | | * | |
| * computes the dot product of two double-complex vectors. It returns the | | * computes the dot product of two double-complex vectors. It returns the | |
| * dot product of the double-complex vectors x and y if successful, and dou
ble-complex | | * dot product of the double-complex vectors x and y if successful, and dou
ble-complex | |
| * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * inc
x] * | | * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * inc
x] * | |
| * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * inc
x; | | * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * inc
x; | |
| * ly is defined in a similar way using incy. | | * ly is defined in a similar way using incy. | |
| * | | * | |
| * Input | | * Input | |
| | | | |
| skipping to change at line 1240 | | skipping to change at line 1350 | |
| * ------ | | * ------ | |
| * returns double-complex dot product (zero if n <= 0) | | * returns double-complex dot product (zero if n <= 0) | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/zdotu.f | | * Reference: http://www.netlib.org/blas/zdotu.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialize
d | |
|
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU | |
| */ | | */ | |
| cuDoubleComplex CUBLASAPI cublasZdotu (int n, const cuDoubleComplex *x, int
incx, | | cuDoubleComplex CUBLASAPI cublasZdotu (int n, const cuDoubleComplex *x, int
incx, | |
| const cuDoubleComplex *y, int incy); | | const cuDoubleComplex *y, int incy); | |
| | | | |
| /* | | /* | |
|
| | | * cuDoubleComplex | |
| | | * cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleCo | |
| | | mplex *y, int incy) | |
| | | * | |
| | | * computes the dot product of two double-precision complex vectors. It ret | |
| | | urns the | |
| | | * dot product of the double-precision complex vectors conjugate(x) and y i | |
| | | f successful, | |
| | | * and double-precision complex zero otherwise. It computes the | |
| | | * sum for i = 0 to n - 1 of conjugate(x[lx + i * incx]) * y[ly + i * incy | |
| | | ], | |
| | | * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; | |
| | | * ly is defined in a similar way using incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * x double-precision complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-precision complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * returns double-complex dot product (zero if n <= 0) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zdotc.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU | |
| | | */ | |
| | | cuDoubleComplex CUBLASAPI cublasZdotc( int n, const cuDoubleComplex *x, int | |
| | | incx, | |
| | | const cuDoubleComplex *y, int incy ) | |
| | | ; | |
| | | | |
| | | /* | |
| * void | | * void | |
| * cublasZscal (int n, cuComplex alpha, cuComplex *x, int incx) | | * cublasZscal (int n, cuComplex alpha, cuComplex *x, int incx) | |
| * | | * | |
| * replaces double-complex vector x with double-complex alpha * x. For i | | * replaces double-complex vector x with double-complex alpha * x. For i | |
| * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | | * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx]
, | |
| * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | | * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * n number of elements in input vectors | | * n number of elements in input vectors | |
| | | | |
| skipping to change at line 1275 | | skipping to change at line 1422 | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *
x, int incx); | | void CUBLASAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *
x, int incx); | |
| | | | |
|
| | | /* | |
| | | * void | |
| | | * cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * replaces double-complex vector x with double-complex alpha * x. For i | |
| | | * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx] | |
| | | , | |
| | | * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * alpha double precision scalar multiplier | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x double-complex result (unchanged if n <= 0 or incx <= 0) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zdscal.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * double | |
| | | * cublasDznrm2 (int n, const cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * computes the Euclidean norm of the double precision complex n-vector x. | |
| | | This code | |
| | | * uses simple scaling to avoid intermediate underflow and overflow. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vector | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dznrm2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | double CUBLASAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZrotg (cuDoubleComplex *host_ca, cuDoubleComplex cb, double *host_ | |
| | | sc, double *host_cs) | |
| | | * | |
| | | * constructs the complex Givens tranformation | |
| | | * | |
| | | * ( sc cs ) | |
| | | * G = ( ) , sc^2 + cabs(cs)^2 = 1, | |
| | | * (-cs sc ) | |
| | | * | |
| | | * which zeros the second entry of the complex 2-vector transpose(ca, cb). | |
| | | * | |
| | | * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The | |
| | | * function crot (n, x, incx, y, incy, sc, cs) is normally called next | |
| | | * to apply the transformation to a 2 x n matrix. | |
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * ca double-precision complex precision scalar | |
| | | * cb double-precision complex scalar | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * ca double-precision complex ca/cabs(ca)*norm(ca,cb) | |
| | | * sc double-precision cosine component of rotation matrix | |
| | | * cs double-precision complex sine component of rotation matrix | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zrotg.f | |
| | | * | |
| | | * This function does not set any error status. | |
| | | */ | |
| | | void CUBLASAPI cublasZrotg (cuDoubleComplex *host_ca, cuDoubleComplex cb, d | |
| | | ouble *host_sc, | |
| | | cuDoubleComplex *host_cs); | |
| | | | |
| | | /* | |
| | | * cublasZrot (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int | |
| | | incy, double sc, | |
| | | * cuDoubleComplex cs) | |
| | | * | |
| | | * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x | |
| | | ) ) | |
| | | * (-conj(cs) sc) ( transpose(y | |
| | | ) ) | |
| | | * | |
| | | * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 | |
| | | if | |
| | | * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly an | |
| | | d | |
| | | * incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * x double-precision complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-precision complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * sc double-precision cosine component of rotation matrix | |
| | | * cs double-precision complex sine component of rotation matrix | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x rotated double-precision complex vector x (unchanged if n <= 0) | |
| | | * y rotated double-precision complex vector y (unchanged if n <= 0) | |
| | | * | |
| | | * Reference: http://netlib.org/lapack/explore-html/zrot.f.html | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZrot (int n, cuDoubleComplex *x, int incx, | |
| | | cuDoubleComplex *y, int incy, double sc, | |
| | | cuDoubleComplex cs); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * zdrot (int n, cuDoubleComplex *x, int incx, cuCumplex *y, int incy, doub | |
| | | le c, | |
| | | * double s) | |
| | | * | |
| | | * multiplies a 2x2 matrix ( c s) with the 2xn matrix ( transpose(x) ) | |
| | | * (-s c) ( transpose(y) ) | |
| | | * | |
| | | * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 | |
| | | if | |
| | | * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly an | |
| | | d | |
| | | * incy. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vectors | |
| | | * x double-precision complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * y double-precision complex vector with n elements | |
| | | * incy storage spacing between elements of y | |
| | | * c cosine component of rotation matrix | |
| | | * s sine component of rotation matrix | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x rotated vector x (unchanged if n <= 0) | |
| | | * y rotated vector y (unchanged if n <= 0) | |
| | | * | |
| | | * Reference http://www.netlib.org/blas/zdrot.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZdrot (int n, cuDoubleComplex *x, int incx, | |
| | | cuDoubleComplex *y, int incy, double c, double | |
| | | s); | |
| | | | |
| | | /* | |
| | | * int | |
| | | * cublasIzamax (int n, const double *x, int incx) | |
| | | * | |
| | | * finds the smallest index of the element having maximum absolute value | |
| | | * in double-complex vector x; that is, the result is the first i, i = 0 | |
| | | * to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx]) | |
| | | ). | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vector | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * returns the smallest index (0 if n <= 0 or incx <= 0) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/izamax.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | int CUBLASAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx); | |
| | | | |
| | | /* | |
| | | * int | |
| | | * cublasIzamin (int n, const cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * finds the smallest index of the element having minimum absolute value | |
| | | * in double-complex vector x; that is, the result is the first i, i = 0 | |
| | | * to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx]) | |
| | | ). | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vector | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * returns the smallest index (0 if n <= 0 or incx <= 0) | |
| | | * | |
| | | * Reference: Analogous to IZAMAX, see there. | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | int CUBLASAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx); | |
| | | | |
| | | /* | |
| | | * double | |
| | | * cublasDzasum (int n, const cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * takes the sum of the absolute values of a complex vector and returns a | |
| | | * double precision result. Note that this is not the L1 norm of the vector | |
| | | . | |
| | | * The result is the sum from 0 to n-1 of abs(real(x[ix+i*incx])) + | |
| | | * abs(imag(x(ix+i*incx))), where ix = 1 if incx <= 0, else ix = 1+(1-n)*in | |
| | | cx. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * n number of elements in input vector | |
| | | * x double-complex vector with n elements | |
| | | * incx storage spacing between elements of x | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * returns the double precision sum of absolute values of real and imaginar | |
| | | y | |
| | | * parts (0 if n <= 0 or incx <= 0, or if an error occurs) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dzasum.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | double CUBLASAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx); | |
| | | | |
| /* --------------- CUBLAS single precision BLAS2 functions ---------------
- */ | | /* --------------- CUBLAS single precision BLAS2 functions ---------------
- */ | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasSgbmv (char trans, int m, int n, int kl, int ku, float alpha, | | * cublasSgbmv (char trans, int m, int n, int kl, int ku, float alpha, | |
| * const float *A, int lda, const float *x, int incx, float be
ta, | | * const float *A, int lda, const float *x, int incx, float be
ta, | |
| * float *y, int incy) | | * float *y, int incy) | |
| * | | * | |
| * performs one of the matrix-vector operations | | * performs one of the matrix-vector operations | |
| * | | * | |
| | | | |
| skipping to change at line 1893 | | skipping to change at line 2309 | |
| * ------ | | * ------ | |
| * x updated according to x = op(A) * x | | * x updated according to x = op(A) * x | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/stbmv.f | | * Reference: http://www.netlib.org/blas/stbmv.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
|
| * CUBLAS_STATUS_INVALID_VALUE if n < 0, n > 4070, k < 0, or incx == 0 | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, k < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasStbmv (char uplo, char trans, char diag, int n, int k, | | void CUBLASAPI cublasStbmv (char uplo, char trans, char diag, int n, int k, | |
| const float *A, int lda, float *x, int incx); | | const float *A, int lda, float *x, int incx); | |
| | | | |
| /* | | /* | |
| * void cublasStbsv (char uplo, char trans, char diag, int n, int k, | | * void cublasStbsv (char uplo, char trans, char diag, int n, int k, | |
| * const float *A, int lda, float *X, int incx) | | * const float *A, int lda, float *X, int incx) | |
| * | | * | |
| * solves one of the systems of equations op(A)*x = b, where op(A) is eithe
r | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe
r | |
| | | | |
| skipping to change at line 1954 | | skipping to change at line 2371 | |
| * ------ | | * ------ | |
| * x updated to contain the solution vector x that solves op(A) * x =
b. | | * x updated to contain the solution vector x that solves op(A) * x =
b. | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/stbsv.f | | * Reference: http://www.netlib.org/blas/stbsv.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
|
| * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0, or n > 4070 | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 4070 | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasStbsv (char uplo, char trans, char diag, int n, int k, | | void CUBLASAPI cublasStbsv (char uplo, char trans, char diag, int n, int k, | |
| const float *A, int lda, float *x, int incx); | | const float *A, int lda, float *x, int incx); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasStpmv (char uplo, char trans, char diag, int n, const float *AP, | | * cublasStpmv (char uplo, char trans, char diag, int n, const float *AP, | |
| * float *x, int incx); | | * float *x, int incx); | |
| * | | * | |
| | | | |
| skipping to change at line 2007 | | skipping to change at line 2424 | |
| * x updated according to x = op(A) * x, | | * x updated according to x = op(A) * x, | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/stpmv.f | | * Reference: http://www.netlib.org/blas/stpmv.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
|
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern
al scratch vector memory | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasStpmv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasStpmv (char uplo, char trans, char diag, int n, | |
| const float *AP, float *x, int incx); | | const float *AP, float *x, int incx); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasStpsv (char uplo, char trans, char diag, int n, const float *AP, | | * cublasStpsv (char uplo, char trans, char diag, int n, const float *AP, | |
| * float *X, int incx) | | * float *X, int incx) | |
| * | | * | |
| | | | |
| skipping to change at line 2129 | | skipping to change at line 2547 | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasStrsv (char uplo, char trans, char diag, int n, const float *A, | | * cublasStrsv (char uplo, char trans, char diag, int n, const float *A, | |
| * int lda, float *x, int incx) | | * int lda, float *x, int incx) | |
| * | | * | |
| * solves a system of equations op(A) * x = b, where op(A) is either A or | | * solves a system of equations op(A) * x = b, where op(A) is either A or | |
| * transpose(A). b and x are single precision vectors consisting of n | | * transpose(A). b and x are single precision vectors consisting of n | |
| * elements, and A is an n x n matrix composed of a unit or non-unit, upper | | * elements, and A is an n x n matrix composed of a unit or non-unit, upper | |
| * or lower triangular matrix. Matrix A is stored in column major format, | | * or lower triangular matrix. Matrix A is stored in column major format, | |
|
| * and lda is the leading dimension of the two-diemnsional array containing | | * and lda is the leading dimension of the two-dimensional array containing | |
| * A. | | * A. | |
| * | | * | |
| * No test for singularity or near-singularity is included in this function
. | | * No test for singularity or near-singularity is included in this function
. | |
| * Such tests must be performed before calling this function. | | * Such tests must be performed before calling this function. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * uplo specifies whether the matrix data is stored in the upper or the | | * uplo specifies whether the matrix data is stored in the upper or the | |
| * lower triangular part of array A. If uplo = 'U' or 'u', then only | | * lower triangular part of array A. If uplo = 'U' or 'u', then only | |
| * the upper triangular part of A may be referenced. If uplo = 'L' o
r | | * the upper triangular part of A may be referenced. If uplo = 'L' o
r | |
| * 'l', then only the lower triangular part of A may be referenced. | | * 'l', then only the lower triangular part of A may be referenced. | |
| * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = '
t', | | * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = '
t', | |
| * 'T', 'c', or 'C', op(A) = transpose(A) | | * 'T', 'c', or 'C', op(A) = transpose(A) | |
| * diag specifies whether or not A is a unit triangular matrix like so: | | * diag specifies whether or not A is a unit triangular matrix like so: | |
| * if diag = 'U' or 'u', A is assumed to be unit triangular. If | | * if diag = 'U' or 'u', A is assumed to be unit triangular. If | |
| * diag = 'N' or 'n', then A is not assumed to be unit triangular. | | * diag = 'N' or 'n', then A is not assumed to be unit triangular. | |
| * n specifies the number of rows and columns of the matrix A. It | | * n specifies the number of rows and columns of the matrix A. It | |
|
| * must be at least 0. In the current implementation n must be <= | | * must be at least 0. | |
| * 4070. | | | |
| * A is a single precision array of dimensions (lda, n). If uplo = 'U' | | * A is a single precision array of dimensions (lda, n). If uplo = 'U' | |
| * or 'u', then A must contains the upper triangular part of a symme
tric | | * or 'u', then A must contains the upper triangular part of a symme
tric | |
| * matrix, and the strictly lower triangular parts is not referenced
. | | * matrix, and the strictly lower triangular parts is not referenced
. | |
| * If uplo = 'L' or 'l', then A contains the lower triangular part o
f | | * If uplo = 'L' or 'l', then A contains the lower triangular part o
f | |
| * a symmetric matrix, and the strictly upper triangular part is not | | * a symmetric matrix, and the strictly upper triangular part is not | |
| * referenced. | | * referenced. | |
| * lda is the leading dimension of the two-dimensional array containing
A. | | * lda is the leading dimension of the two-dimensional array containing
A. | |
| * lda must be at least max(1, n). | | * lda must be at least max(1, n). | |
| * x single precision array of length at least (1 + (n - 1) * abs(incx
)). | | * x single precision array of length at least (1 + (n - 1) * abs(incx
)). | |
| * On entry, x contains the n element right-hand side vector b. On e
xit, | | * On entry, x contains the n element right-hand side vector b. On e
xit, | |
| | | | |
| skipping to change at line 2174 | | skipping to change at line 2591 | |
| * ------ | | * ------ | |
| * x updated to contain the solution vector x that solves op(A) * x =
b. | | * x updated to contain the solution vector x that solves op(A) * x =
b. | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/strsv.f | | * Reference: http://www.netlib.org/blas/strsv.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
|
| * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 4070 | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasStrsv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasStrsv (char uplo, char trans, char diag, int n, | |
| const float *A, int lda, float *x, int incx); | | const float *A, int lda, float *x, int incx); | |
| | | | |
|
| | | /* ----------------- CUBLAS double complex BLAS2 functions ---------------- | |
| | | - */ | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZtrmv (char uplo, char trans, char diag, int n, const cuDoubleComp | |
| | | lex *A, | |
| | | * int lda, cuDoubleComplex *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, | |
| | | * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose( | |
| | | A)). | |
| | | * x is an n-element double precision complex vector, and | |
| | | * A is an n x n, unit or non-unit, upper or lower, triangular matrix compo | |
| | | sed | |
| | | * of double precision complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo = 'U' or 'u', then A is an upper triangular matri | |
| | | x. | |
| | | * If uplo = 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' | |
| | | or | |
| | | * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = | |
| | | * conjugate(transpose(A)). | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag = ' | |
| | | U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', | |
| | | A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * A double precision array of dimension (lda, n). If uplo = 'U' or 'u | |
| | | ', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular matrix and the strictly lower triangular par | |
| | | t | |
| | | * of A is not referenced. If uplo = 'L' or 'l', the leading n x n l | |
| | | ower | |
| | | * triangular part of the array A must contain the lower triangular | |
| | | * matrix and the strictly upper triangular part of A is not referen | |
| | | ced. | |
| | | * When diag = 'U' or 'u', the diagonal elements of A are not refere | |
| | | nced | |
| | | * either, but are are assumed to be unity. | |
| | | * lda is the leading dimension of A. It must be at least max (1, n). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | ) ). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztrmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtrmv (char uplo, char trans, char diag, int n, | |
| | | const cuDoubleComplex *A, int lda, cuDoubleComp | |
| | | lex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZgbmv (char trans, int m, int n, int kl, int ku, cuDoubleComplex a | |
| | | lpha, | |
| | | * const cuDoubleComplex *A, int lda, const cuDoubleComplex *x | |
| | | , int incx, cuDoubleComplex beta, | |
| | | * cuDoubleComplex *y, int incy); | |
| | | * | |
| | | * performs one of the matrix-vector operations | |
| | | * | |
| | | * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) | |
| | | * | |
| | | * alpha and beta are double precision complex scalars. x and y are double | |
| | | precision | |
| | | * complex vectors. A is an m by n band matrix consisting of double precisi | |
| | | on complex elements | |
| | | * with kl sub-diagonals and ku super-diagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * m specifies the number of rows of the matrix A. m must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. n must be at lea | |
| | | st | |
| | | * zero. | |
| | | * kl specifies the number of sub-diagonals of matrix A. It must be at | |
| | | * least zero. | |
| | | * ku specifies the number of super-diagonals of matrix A. It must be a | |
| | | t | |
| | | * least zero. | |
| | | * alpha double precision complex scalar multiplier applied to op(A). | |
| | | * A double precision complex array of dimensions (lda, n). The leadin | |
| | | g | |
| | | * (kl + ku + 1) x n part of the array A must contain the band matri | |
| | | x A, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x | |
| | | * in row (ku + 1) of the array, the first super-diagonal starting a | |
| | | t | |
| | | * position 2 in row ku, the first sub-diagonal starting at position | |
| | | 1 | |
| | | * in row (ku + 2), and so on. Elements in the array A that do not | |
| | | * correspond to elements in the band matrix (such as the top left | |
| | | * ku x ku triangle) are not referenced. | |
| | | * lda leading dimension of A. lda must be at least (kl + ku + 1). | |
| | | * x double precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)) when | |
| | | * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. | |
| | | * incx specifies the increment for the elements of x. incx must not be z | |
| | | ero. | |
| | | * beta double precision complex scalar multiplier applied to vector y. I | |
| | | f beta is | |
| | | * zero, y is not read. | |
| | | * y double precision complex array of length at least (1+(m-1)*abs(in | |
| | | cy)) when | |
| | | * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. I | |
| | | f | |
| | | * beta is zero, y is not read. | |
| | | * incy On entry, incy specifies the increment for the elements of y. inc | |
| | | y | |
| | | * must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*op(A)*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zgbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZgbmv (char trans, int m, int n, int kl, int ku, | |
| | | cuDoubleComplex alpha, const cuDoubleComplex *A | |
| | | , int lda, | |
| | | const cuDoubleComplex *x, int incx, cuDoubleCom | |
| | | plex beta, | |
| | | cuDoubleComplex *y, int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZtbmv (char uplo, char trans, char diag, int n, int k, const cuDou | |
| | | bleComplex *A, | |
| | | * int lda, cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-eleme | |
| | | nt | |
| | | * double precision complex vector, and A is an n x n, unit or non-unit, up | |
| | | per | |
| | | * or lower triangular band matrix composed of double precision complex ele | |
| | | ments. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular ba | |
| | | nd | |
| | | * matrix. If uplo == 'U' or 'u', A is an upper triangular band matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular band matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A double precision complex array of dimension (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first | |
| | | * super-diagonal starting at position 2 in row k, and so on. The to | |
| | | p | |
| | | * left k x k triangle of the array A is not referenced. If uplo == | |
| | | 'L' | |
| | | * or 'l', the leading (k + 1) x n part of the array A must constain | |
| | | the | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal startingat position 1 in row 2, and so on. The botto | |
| | | m | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * lda is the leading dimension of A. It must be at least (k + 1). | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtbmv (char uplo, char trans, char diag, int n, | |
| | | int k, const cuDoubleComplex *A, int lda, cuDoub | |
| | | leComplex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void cublasZtbsv (char uplo, char trans, char diag, int n, int k, | |
| | | * const cuDoubleComplex *A, int lda, cuDoubleComplex *X, | |
| | | int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). | |
| | | * b and x are n element vectors, and A is an n x n unit or non-unit, | |
| | | * upper or lower triangular band matrix with k + 1 diagonals. No test | |
| | | * for singularity or near-singularity is included in this function. | |
| | | * Such tests must be performed before calling this function. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular band | |
| | | * matrix as follows: If uplo == 'U' or 'u', A is an upper triangula | |
| | | r | |
| | | * band matrix. If uplo == 'L' or 'l', A is a lower triangular band | |
| | | * matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A double precision complex array of dimension (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first sup | |
| | | er- | |
| | | * diagonal starting at position 2 in row k, and so on. The top left | |
| | | * k x k triangle of the array A is not referenced. If uplo == 'L' o | |
| | | r | |
| | | * 'l', the leading (k + 1) x n part of the array A must constain th | |
| | | e | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal starting at position 1 in row 2, and so on. The bott | |
| | | om | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * x double precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztbsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 1016 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtbsv (char uplo, char trans, char diag, int n, | |
| | | int k, const cuDoubleComplex *A, int lda, cuDou | |
| | | bleComplex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComp | |
| | | lex *A, int lda, | |
| | | * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, c | |
| | | uDoubleComplex *y, int incy) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y = alpha*A*x + beta*y | |
| | | * | |
| | | * Alpha and beta are double precision complex scalars, and x and y are dou | |
| | | ble | |
| | | * precision complex vectors, each with n elements. A is a hermitian n x n | |
| | | matrix | |
| | | * consisting of double precision complex elements that is stored in either | |
| | | upper or | |
| | | * lower storage mode. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the array | |
| | | A | |
| | | * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix | |
| | | A | |
| | | * is stored in upper storage mode, i.e. only the upper triangular p | |
| | | art | |
| | | * of A is to be referenced while the lower triangular part of A is | |
| | | to | |
| | | * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is sto | |
| | | red | |
| | | * in lower storage mode, i.e. only the lower triangular part of A i | |
| | | s | |
| | | * to be referenced while the upper triangular part of A is to be | |
| | | * inferred. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * hermitian matrix A. n must be at least zero. | |
| | | * alpha double precision complex scalar multiplier applied to A*x. | |
| | | * A double precision complex array of dimensions (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular part of the hermitian matrix and the strictl | |
| | | y | |
| | | * lower triangular part of A is not referenced. If uplo == 'L' or ' | |
| | | l', | |
| | | * the leading n x n lower triangular part of the array A must conta | |
| | | in | |
| | | * the lower triangular part of the hermitian matrix and the strictl | |
| | | y | |
| | | * upper triangular part of A is not referenced. The imaginary parts | |
| | | * of the diagonal elements need not be set, they are assumed to be | |
| | | zero. | |
| | | * lda leading dimension of A. It must be at least max (1, n). | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision complex scalar multiplier applied to vector y. | |
| | | * y double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhemv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *A, int lda, const cuDoub | |
| | | leComplex *x, | |
| | | int incx, cuDoubleComplex beta, cuDoubleComplex | |
| | | *y, int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhpmv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComp | |
| | | lex *AP, const cuDoubleComplex *x, | |
| | | * int incx, cuDoubleComplex beta, cuDoubleComplex *y, int inc | |
| | | y) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y = alpha * A * x + beta * y | |
| | | * | |
| | | * Alpha and beta are double precision complex scalars, and x and y are dou | |
| | | ble | |
| | | * precision complex vectors with n elements. A is an hermitian n x n matri | |
| | | x | |
| | | * consisting of double precision complex elements that is supplied in pack | |
| | | ed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array AP. If uplo == 'U' or 'u', then the uppe | |
| | | r | |
| | | * triangular part of A is supplied in AP. If uplo == 'L' or 'l', th | |
| | | en | |
| | | * the lower triangular part of A is supplied in AP. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision complex scalar multiplier applied to A*x. | |
| | | * AP double precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * The imaginary parts of the diagonal elements need not be set, the | |
| | | y | |
| | | * are assumed to be zero. | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision complex scalar multiplier applied to vector y; | |
| | | * y double precision array of length at least (1 + (n - 1) * abs(incy | |
| | | )). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhpmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhpmv (char uplo, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *AP, const cuDoubleComple | |
| | | x *x, | |
| | | int incx, cuDoubleComplex beta, cuDoubleComplex | |
| | | *y, int incy); | |
| | | | |
| /* ----------------- CUBLAS double complex BLAS3 functions ----------------
- */ | | /* ----------------- CUBLAS double complex BLAS3 functions ----------------
- */ | |
| | | | |
| /* | | /* | |
| * cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha, const cuDo
ubleComplex *A, int lda, | | * cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha, const cuDo
ubleComplex *A, int lda, | |
| * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, c
uDoubleComplex *y, int incy) | | * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, c
uDoubleComplex *y, int incy) | |
| * | | * | |
| * performs one of the matrix-vector operations | | * performs one of the matrix-vector operations | |
| * | | * | |
| * y = alpha * op(A) * x + beta * y, | | * y = alpha * op(A) * x + beta * y, | |
| * | | * | |
| | | | |
| skipping to change at line 2243 | | skipping to change at line 3033 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy ==
0 | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy ==
0 | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha
, | | void CUBLASAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha
, | |
| const cuDoubleComplex *A, int lda, const cuDoub
leComplex *x, int incx, | | const cuDoubleComplex *A, int lda, const cuDoub
leComplex *x, int incx, | |
| cuDoubleComplex beta, cuDoubleComplex *y, int i
ncy); | | cuDoubleComplex beta, cuDoubleComplex *y, int i
ncy); | |
| | | | |
|
| | | /* | |
| | | * void | |
| | | * cublasZtpmv (char uplo, char trans, char diag, int n, const cuDoubleComp | |
| | | lex *AP, | |
| | | * cuDoubleComplex *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n elem | |
| | | ent | |
| | | * double precision complex vector, and A is an n x n, unit or non-unit, up | |
| | | per | |
| | | * or lower triangular matrix composed of double precision complex elements | |
| | | . | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo == 'U' or 'u', then A is an upper triangular matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. In the current implementation n must not exceed 40 | |
| | | 70. | |
| | | * AP double precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztpmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtpmv (char uplo, char trans, char diag, int n, | |
| | | const cuDoubleComplex *AP, cuDoubleComplex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZtpsv (char uplo, char trans, char diag, int n, const cuDoubleComp | |
| | | lex *AP, | |
| | | * cuDoubleComplex *X, int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and | |
| | | * x are n element complex vectors, and A is an n x n unit or non-unit, | |
| | | * upper or lower triangular matrix. No test for singularity or near-singul | |
| | | arity | |
| | | * is included in this routine. Such tests must be performed before calling | |
| | | this routine. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular matr | |
| | | ix | |
| | | * as follows: If uplo == 'U' or 'u', A is an upper triangluar matri | |
| | | x. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T' | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = | |
| | | * conjugate(transpose(A)). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * AP double precision complex array with at least ((n*(n+1))/2) elemen | |
| | | ts. | |
| | | * If uplo == 'U' or 'u', the array AP contains the upper triangular | |
| | | * matrix A, packed sequentially, column by column; that is, if i <= | |
| | | j, then | |
| | | * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the | |
| | | * array AP contains the lower triangular matrix A, packed sequentia | |
| | | lly, | |
| | | * column by column; that is, if i >= j, then A[i,j] is stored in | |
| | | * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal eleme | |
| | | nts | |
| | | * of A are not referenced and are assumed to be unity. | |
| | | * x double precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztpsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtpsv (char uplo, char trans, char diag, int n, | |
| | | const cuDoubleComplex *AP, cuDoubleComplex *x, | |
| | | int incx); | |
| | | | |
| /* ----------------- CUBLAS single complex BLAS2 functions ----------------
- */ | | /* ----------------- CUBLAS single complex BLAS2 functions ----------------
- */ | |
|
| | | /* | |
| | | * cublasCgemv (char trans, int m, int n, cuComplex alpha, const cuComplex | |
| | | *A, | |
| | | * int lda, const cuComplex *x, int incx, cuComplex beta, cuCo | |
| | | mplex *y, | |
| | | * int incy) | |
| | | * | |
| | | * performs one of the matrix-vector operations | |
| | | * | |
| | | * y = alpha * op(A) * x + beta * y, | |
| | | * | |
| | | * where op(A) is one of | |
| | | * | |
| | | * op(A) = A or op(A) = transpose(A) or op(A) = conjugate(transpose( | |
| | | A)) | |
| | | * | |
| | | * where alpha and beta are single precision scalars, x and y are single | |
| | | * precision vectors, and A is an m x n matrix consisting of single precisi | |
| | | on | |
| | | * elements. Matrix A is stored in column major format, and lda is the lead | |
| | | ing | |
| | | * dimension of the two-dimensional array in which A is stored. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = | |
| | | * trans = 't' or 'T', op(A) = transpose(A). If trans = 'c' or 'C', | |
| | | * op(A) = conjugate(transpose(A)) | |
| | | * m specifies the number of rows of the matrix A. m must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. n must be at lea | |
| | | st | |
| | | * zero. | |
| | | * alpha single precision scalar multiplier applied to op(A). | |
| | | * A single precision array of dimensions (lda, n) if trans = 'n' or | |
| | | * 'N'), and of dimensions (lda, m) otherwise. lda must be at least | |
| | | * max(1, m) and at least max(1, n) otherwise. | |
| | | * lda leading dimension of two-dimensional array used to store matrix A | |
| | | * x single precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )) | |
| | | * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) | |
| | | * otherwise. | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * beta single precision scalar multiplier applied to vector y. If beta | |
| | | * is zero, y is not read. | |
| | | * y single precision array of length at least (1 + (m - 1) * abs(incy | |
| | | )) | |
| | | * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) | |
| | | * otherwise. | |
| | | * incy specifies the storage spacing between elements of y. incy must no | |
| | | t | |
| | | * be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to alpha * op(A) * x + beta * y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cgemv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == | |
| | | 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCgemv (char trans, int m, int n, cuComplex alpha, | | void CUBLASAPI cublasCgemv (char trans, int m, int n, cuComplex alpha, | |
| const cuComplex *A, int lda, const cuComplex *x
, | | const cuComplex *A, int lda, const cuComplex *x
, | |
| int incx, cuComplex beta, cuComplex *y, int inc
y); | | int incx, cuComplex beta, cuComplex *y, int inc
y); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCgbmv (char trans, int m, int n, int kl, int ku, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, const cuComplex *x, int incx, | |
| | | cuComplex beta, | |
| | | * cuComplex *y, int incy); | |
| | | * | |
| | | * performs one of the matrix-vector operations | |
| | | * | |
| | | * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) | |
| | | * | |
| | | * alpha and beta are single precision complex scalars. x and y are single | |
| | | precision | |
| | | * complex vectors. A is an m by n band matrix consisting of single precisi | |
| | | on complex elements | |
| | | * with kl sub-diagonals and ku super-diagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * m specifies the number of rows of the matrix A. m must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. n must be at lea | |
| | | st | |
| | | * zero. | |
| | | * kl specifies the number of sub-diagonals of matrix A. It must be at | |
| | | * least zero. | |
| | | * ku specifies the number of super-diagonals of matrix A. It must be a | |
| | | t | |
| | | * least zero. | |
| | | * alpha single precision complex scalar multiplier applied to op(A). | |
| | | * A single precision complex array of dimensions (lda, n). The leadin | |
| | | g | |
| | | * (kl + ku + 1) x n part of the array A must contain the band matri | |
| | | x A, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x | |
| | | * in row (ku + 1) of the array, the first super-diagonal starting a | |
| | | t | |
| | | * position 2 in row ku, the first sub-diagonal starting at position | |
| | | 1 | |
| | | * in row (ku + 2), and so on. Elements in the array A that do not | |
| | | * correspond to elements in the band matrix (such as the top left | |
| | | * ku x ku triangle) are not referenced. | |
| | | * lda leading dimension of A. lda must be at least (kl + ku + 1). | |
| | | * x single precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)) when | |
| | | * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. | |
| | | * incx specifies the increment for the elements of x. incx must not be z | |
| | | ero. | |
| | | * beta single precision complex scalar multiplier applied to vector y. I | |
| | | f beta is | |
| | | * zero, y is not read. | |
| | | * y single precision complex array of length at least (1+(m-1)*abs(in | |
| | | cy)) when | |
| | | * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. I | |
| | | f | |
| | | * beta is zero, y is not read. | |
| | | * incy On entry, incy specifies the increment for the elements of y. inc | |
| | | y | |
| | | * must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*op(A)*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cgbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCgbmv (char trans, int m, int n, int kl, int ku, | | void CUBLASAPI cublasCgbmv (char trans, int m, int n, int kl, int ku, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| const cuComplex *x, int incx, cuComplex beta, | | const cuComplex *x, int incx, cuComplex beta, | |
| cuComplex *y, int incy); | | cuComplex *y, int incy); | |
|
| | | /* | |
| | | * void | |
| | | * cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A, int | |
| | | lda, | |
| | | * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, | |
| | | int incy) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y = alpha*A*x + beta*y | |
| | | * | |
| | | * Alpha and beta are single precision complex scalars, and x and y are sin | |
| | | gle | |
| | | * precision complex vectors, each with n elements. A is a hermitian n x n | |
| | | matrix | |
| | | * consisting of single precision complex elements that is stored in either | |
| | | upper or | |
| | | * lower storage mode. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the array | |
| | | A | |
| | | * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix | |
| | | A | |
| | | * is stored in upper storage mode, i.e. only the upper triangular p | |
| | | art | |
| | | * of A is to be referenced while the lower triangular part of A is | |
| | | to | |
| | | * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is sto | |
| | | red | |
| | | * in lower storage mode, i.e. only the lower triangular part of A i | |
| | | s | |
| | | * to be referenced while the upper triangular part of A is to be | |
| | | * inferred. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * hermitian matrix A. n must be at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to A*x. | |
| | | * A single precision complex array of dimensions (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular part of the hermitian matrix and the strictl | |
| | | y | |
| | | * lower triangular part of A is not referenced. If uplo == 'L' or ' | |
| | | l', | |
| | | * the leading n x n lower triangular part of the array A must conta | |
| | | in | |
| | | * the lower triangular part of the hermitian matrix and the strictl | |
| | | y | |
| | | * upper triangular part of A is not referenced. The imaginary parts | |
| | | * of the diagonal elements need not be set, they are assumed to be | |
| | | zero. | |
| | | * lda leading dimension of A. It must be at least max (1, n). | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta single precision complex scalar multiplier applied to vector y. | |
| | | * y single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/chemv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | | |
| void CUBLASAPI cublasChemv (char uplo, int n, cuComplex alpha, | | void CUBLASAPI cublasChemv (char uplo, int n, cuComplex alpha, | |
| const cuComplex *A, int lda, const cuComplex *x
, | | const cuComplex *A, int lda, const cuComplex *x
, | |
| int incx, cuComplex beta, cuComplex *y, int inc
y); | | int incx, cuComplex beta, cuComplex *y, int inc
y); | |
|
| | | /* | |
| | | * void | |
| | | * cublasChbmv (char uplo, int n, int k, cuComplex alpha, const cuComplex * | |
| | | A, int lda, | |
| | | * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, | |
| | | int incy) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y := alpha*A*x + beta*y | |
| | | * | |
| | | * alpha and beta are single precision complex scalars. x and y are single | |
| | | precision | |
| | | * complex vectors with n elements. A is an n by n hermitian band matrix co | |
| | | nsisting | |
| | | * of single precision complex elements, with k super-diagonals and the sam | |
| | | e number | |
| | | * of subdiagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the hermi | |
| | | tian | |
| | | * band matrix A is being supplied. If uplo == 'U' or 'u', the upper | |
| | | * triangular part is being supplied. If uplo == 'L' or 'l', the low | |
| | | er | |
| | | * triangular part is being supplied. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * hermitian matrix A. n must be at least zero. | |
| | | * k specifies the number of super-diagonals of matrix A. Since the ma | |
| | | trix | |
| | | * is hermitian, this is also the number of sub-diagonals. k must be | |
| | | at | |
| | | * least zero. | |
| | | * alpha single precision complex scalar multiplier applied to A*x. | |
| | | * A single precision complex array of dimensions (lda, n). When uplo | |
| | | == 'U' or | |
| | | * 'u', the leading (k + 1) x n part of array A must contain the upp | |
| | | er | |
| | | * triangular band of the hermitian matrix, supplied column by colum | |
| | | n, | |
| | | * with the leading diagonal of the matrix in row (k+1) of the array | |
| | | , | |
| | | * the first super-diagonal starting at position 2 in row k, and so | |
| | | on. | |
| | | * The top left k x k triangle of the array A is not referenced. Whe | |
| | | n | |
| | | * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A m | |
| | | ust | |
| | | * contain the lower triangular band part of the hermitian matrix, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x in | |
| | | * row 1 of the array, the first sub-diagonal starting at position 1 | |
| | | in | |
| | | * row 2, and so on. The bottom right k x k triangle of the array A | |
| | | is | |
| | | * not referenced. The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero. | |
| | | * lda leading dimension of A. lda must be at least (k + 1). | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta single precision complex scalar multiplier applied to vector y. I | |
| | | f beta is | |
| | | * zero, y is not read. | |
| | | * y single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/chbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha, | | void CUBLASAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha, | |
| const cuComplex *A, int lda, const cuComplex *x
, | | const cuComplex *A, int lda, const cuComplex *x
, | |
| int incx, cuComplex beta, cuComplex *y, int inc
y); | | int incx, cuComplex beta, cuComplex *y, int inc
y); | |
| void CUBLASAPI cublasChpmv (char uplo, int n, cuComplex alpha, | | void CUBLASAPI cublasChpmv (char uplo, int n, cuComplex alpha, | |
| const cuComplex *AP, const cuComplex *x, int in
cx, | | const cuComplex *AP, const cuComplex *x, int in
cx, | |
| cuComplex beta, cuComplex *y, int incy); | | cuComplex beta, cuComplex *y, int incy); | |
|
| | | | |
| | | /* | |
| | | * | |
| | | * cublasCtrmv (char uplo, char trans, char diag, int n, const cuComplex *A | |
| | | , | |
| | | * int lda, cuComplex *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, | |
| | | * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose( | |
| | | A)). | |
| | | * x is an n-element signle precision complex vector, and | |
| | | * A is an n x n, unit or non-unit, upper or lower, triangular matrix compo | |
| | | sed | |
| | | * of single precision complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo = 'U' or 'u', then A is an upper triangular matri | |
| | | x. | |
| | | * If uplo = 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' | |
| | | or | |
| | | * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = | |
| | | * conjugate(transpose(A)). | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag = ' | |
| | | U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', | |
| | | A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * A single precision array of dimension (lda, n). If uplo = 'U' or 'u | |
| | | ', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular matrix and the strictly lower triangular par | |
| | | t | |
| | | * of A is not referenced. If uplo = 'L' or 'l', the leading n x n l | |
| | | ower | |
| | | * triangular part of the array A must contain the lower triangular | |
| | | * matrix and the strictly upper triangular part of A is not referen | |
| | | ced. | |
| | | * When diag = 'U' or 'u', the diagonal elements of A are not refere | |
| | | nced | |
| | | * either, but are are assumed to be unity. | |
| | | * lda is the leading dimension of A. It must be at least max (1, n). | |
| | | * x single precision array of length at least (1 + (n - 1) * abs(incx | |
| | | ) ). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctrmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtrmv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasCtrmv (char uplo, char trans, char diag, int n, | |
| const cuComplex *A, int lda, cuComplex *x, | | const cuComplex *A, int lda, cuComplex *x, | |
| int incx); | | int incx); | |
|
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasCtbmv (char uplo, char trans, char diag, int n, int k, const cuCom | |
| | | plex *A, | |
| | | * int lda, cuComplex *x, int incx) | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-eleme | |
| | | nt | |
| | | * single precision complex vector, and A is an n x n, unit or non-unit, up | |
| | | per | |
| | | * or lower triangular band matrix composed of single precision complex ele | |
| | | ments. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular ba | |
| | | nd | |
| | | * matrix. If uplo == 'U' or 'u', A is an upper triangular band matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular band matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A single precision complex array of dimension (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first | |
| | | * super-diagonal starting at position 2 in row k, and so on. The to | |
| | | p | |
| | | * left k x k triangle of the array A is not referenced. If uplo == | |
| | | 'L' | |
| | | * or 'l', the leading (k + 1) x n part of the array A must constain | |
| | | the | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal startingat position 1 in row 2, and so on. The botto | |
| | | m | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * lda is the leading dimension of A. It must be at least (k + 1). | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k, | | void CUBLASAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k, | |
| const cuComplex *A, int lda, cuComplex *x, | | const cuComplex *A, int lda, cuComplex *x, | |
| int incx); | | int incx); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCtpmv (char uplo, char trans, char diag, int n, const cuComplex *A | |
| | | P, | |
| | | * cuComplex *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n elem | |
| | | ent | |
| | | * single precision complex vector, and A is an n x n, unit or non-unit, up | |
| | | per | |
| | | * or lower triangular matrix composed of single precision complex elements | |
| | | . | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo == 'U' or 'u', then A is an upper triangular matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. In the current implementation n must not exceed 40 | |
| | | 70. | |
| | | * AP single precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctpmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtpmv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasCtpmv (char uplo, char trans, char diag, int n, | |
| const cuComplex *AP, cuComplex *x, int incx); | | const cuComplex *AP, cuComplex *x, int incx); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCtrsv (char uplo, char trans, char diag, int n, const cuComplex *A | |
| | | , | |
| | | * int lda, cuComplex *x, int incx) | |
| | | * | |
| | | * solves a system of equations op(A) * x = b, where op(A) is either A, | |
| | | * transpose(A) or conjugate(transpose(A)). b and x are single precision | |
| | | * complex vectors consisting of n elements, and A is an n x n matrix | |
| | | * composed of a unit or non-unit, upper or lower triangular matrix. | |
| | | * Matrix A is stored in column major format, and lda is the leading | |
| | | * dimension of the two-dimensional array containing A. | |
| | | * | |
| | | * No test for singularity or near-singularity is included in this function | |
| | | . | |
| | | * Such tests must be performed before calling this function. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the | |
| | | * lower triangular part of array A. If uplo = 'U' or 'u', then only | |
| | | * the upper triangular part of A may be referenced. If uplo = 'L' o | |
| | | r | |
| | | * 'l', then only the lower triangular part of A may be referenced. | |
| | | * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = ' | |
| | | t', | |
| | | * 'T', 'c', or 'C', op(A) = transpose(A) | |
| | | * diag specifies whether or not A is a unit triangular matrix like so: | |
| | | * if diag = 'U' or 'u', A is assumed to be unit triangular. If | |
| | | * diag = 'N' or 'n', then A is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. It | |
| | | * must be at least 0. | |
| | | * A is a single precision complex array of dimensions (lda, n). If up | |
| | | lo = 'U' | |
| | | * or 'u', then A must contains the upper triangular part of a symme | |
| | | tric | |
| | | * matrix, and the strictly lower triangular parts is not referenced | |
| | | . | |
| | | * If uplo = 'L' or 'l', then A contains the lower triangular part o | |
| | | f | |
| | | * a symmetric matrix, and the strictly upper triangular part is not | |
| | | * referenced. | |
| | | * lda is the leading dimension of the two-dimensional array containing | |
| | | A. | |
| | | * lda must be at least max(1, n). | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the n element right-hand side vector b. On e | |
| | | xit, | |
| | | * it is overwritten with the solution vector x. | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctrsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtrsv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasCtrsv (char uplo, char trans, char diag, int n, | |
| const cuComplex *A, int lda, cuComplex *x, | | const cuComplex *A, int lda, cuComplex *x, | |
| int incx); | | int incx); | |
|
| | | /* | |
| | | * void cublasCtbsv (char uplo, char trans, char diag, int n, int k, | |
| | | * const cuComplex *A, int lda, cuComplex *X, int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). | |
| | | * b and x are n element vectors, and A is an n x n unit or non-unit, | |
| | | * upper or lower triangular band matrix with k + 1 diagonals. No test | |
| | | * for singularity or near-singularity is included in this function. | |
| | | * Such tests must be performed before calling this function. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular band | |
| | | * matrix as follows: If uplo == 'U' or 'u', A is an upper triangula | |
| | | r | |
| | | * band matrix. If uplo == 'L' or 'l', A is a lower triangular band | |
| | | * matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * 't', op(A) = transpose(A). If trans == 'C' or 'c', | |
| | | * op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A single precision complex array of dimension (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first sup | |
| | | er- | |
| | | * diagonal starting at position 2 in row k, and so on. The top left | |
| | | * k x k triangle of the array A is not referenced. If uplo == 'L' o | |
| | | r | |
| | | * 'l', the leading (k + 1) x n part of the array A must constain th | |
| | | e | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal starting at position 1 in row 2, and so on. The bott | |
| | | om | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * x single precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctbsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtbsv (char uplo, char trans, char diag, int n, int k, | | void CUBLASAPI cublasCtbsv (char uplo, char trans, char diag, int n, int k, | |
| const cuComplex *A, int lda, cuComplex *x, | | const cuComplex *A, int lda, cuComplex *x, | |
| int incx); | | int incx); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCtpsv (char uplo, char trans, char diag, int n, const cuComplex *A | |
| | | P, | |
| | | * cuComplex *X, int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and | |
| | | * x are n element complex vectors, and A is an n x n unit or non-unit, | |
| | | * upper or lower triangular matrix. No test for singularity or near-singul | |
| | | arity | |
| | | * is included in this routine. Such tests must be performed before calling | |
| | | this routine. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular matr | |
| | | ix | |
| | | * as follows: If uplo == 'U' or 'u', A is an upper triangluar matri | |
| | | x. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T' | |
| | | * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = | |
| | | * conjugate(transpose(A)). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * AP single precision complex array with at least ((n*(n+1))/2) elemen | |
| | | ts. | |
| | | * If uplo == 'U' or 'u', the array AP contains the upper triangular | |
| | | * matrix A, packed sequentially, column by column; that is, if i <= | |
| | | j, then | |
| | | * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the | |
| | | * array AP contains the lower triangular matrix A, packed sequentia | |
| | | lly, | |
| | | * column by column; that is, if i >= j, then A[i,j] is stored in | |
| | | * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal eleme | |
| | | nts | |
| | | * of A are not referenced and are assumed to be unity. | |
| | | * x single precision complex array of length at least (1+(n-1)*abs(in | |
| | | cx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctpsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtpsv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasCtpsv (char uplo, char trans, char diag, int n, | |
| const cuComplex *AP, cuComplex *x, int incx); | | const cuComplex *AP, cuComplex *x, int incx); | |
|
| | | | |
| | | /* | |
| | | * cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x, int incx | |
| | | , | |
| | | * const cuComplex *y, int incy, cuComplex *A, int lda) | |
| | | * | |
| | | * performs the symmetric rank 1 operation | |
| | | * | |
| | | * A = alpha * x * transpose(y) + A, | |
| | | * | |
| | | * where alpha is a single precision complex scalar, x is an m element sing | |
| | | le | |
| | | * precision complex vector, y is an n element single precision complex vec | |
| | | tor, and A | |
| | | * is an m by n matrix consisting of single precision complex elements. Mat | |
| | | rix A | |
| | | * is stored in column major format, and lda is the leading dimension of | |
| | | * the two-dimensional array used to store A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * m specifies the number of rows of the matrix A. It must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. It must be at | |
| | | * least zero. | |
| | | * alpha single precision complex scalar multiplier applied to x * transpo | |
| | | se(y) | |
| | | * x single precision complex array of length at least (1 + (m - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * y single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)) | |
| | | * incy specifies the storage spacing between elements of y. incy must no | |
| | | t | |
| | | * be zero. | |
| | | * A single precision complex array of dimensions (lda, n). | |
| | | * lda leading dimension of two-dimensional array used to store matrix A | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * transpose(y) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cgeru.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex
*x, | | void CUBLASAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex
*x, | |
| int incx, const cuComplex *y, int incy, | | int incx, const cuComplex *y, int incy, | |
| cuComplex *A, int lda); | | cuComplex *A, int lda); | |
|
| | | /* | |
| | | * cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x, int incx | |
| | | , | |
| | | * const cuComplex *y, int incy, cuComplex *A, int lda) | |
| | | * | |
| | | * performs the symmetric rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(y)) + A, | |
| | | * | |
| | | * where alpha is a single precision complex scalar, x is an m element sing | |
| | | le | |
| | | * precision complex vector, y is an n element single precision complex vec | |
| | | tor, and A | |
| | | * is an m by n matrix consisting of single precision complex elements. Mat | |
| | | rix A | |
| | | * is stored in column major format, and lda is the leading dimension of | |
| | | * the two-dimensional array used to store A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * m specifies the number of rows of the matrix A. It must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. It must be at | |
| | | * least zero. | |
| | | * alpha single precision complex scalar multiplier applied to x * transpo | |
| | | se(y) | |
| | | * x single precision complex array of length at least (1 + (m - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * y single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)) | |
| | | * incy specifies the storage spacing between elements of y. incy must no | |
| | | t | |
| | | * be zero. | |
| | | * A single precision complex array of dimensions (lda, n). | |
| | | * lda leading dimension of two-dimensional array used to store matrix A | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(y)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cgerc.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex
*x, | | void CUBLASAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex
*x, | |
| int incx, const cuComplex *y, int incy, | | int incx, const cuComplex *y, int incy, | |
| cuComplex *A, int lda); | | cuComplex *A, int lda); | |
|
| void CUBLASAPI cublasCher (char uplo, int n, cuComplex alpha, | | /* | |
| | | * void | |
| | | * cublasCher (char uplo, int n, float alpha, const cuComplex *x, int incx, | |
| | | * cuComplex *A, int lda) | |
| | | * | |
| | | * performs the hermitian rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(x)) + A, | |
| | | * | |
| | | * where alpha is a single precision real scalar, x is an n element single | |
| | | * precision complex vector and A is an n x n hermitian matrix consisting o | |
| | | f | |
| | | * single precision complex elements. Matrix A is stored in column major fo | |
| | | rmat, | |
| | | * and lda is the leading dimension of the two-dimensional array | |
| | | * containing A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or | |
| | | * the lower triangular part of array A. If uplo = 'U' or 'u', | |
| | | * then only the upper triangular part of A may be referenced. | |
| | | * If uplo = 'L' or 'l', then only the lower triangular part of | |
| | | * A may be referenced. | |
| | | * n specifies the number of rows and columns of the matrix A. It | |
| | | * must be at least 0. | |
| | | * alpha single precision real scalar multiplier applied to | |
| | | * x * conjugate(transpose(x)) | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must | |
| | | * not be zero. | |
| | | * A single precision complex array of dimensions (lda, n). If uplo = | |
| | | 'U' or | |
| | | * 'u', then A must contain the upper triangular part of a hermitian | |
| | | * matrix, and the strictly lower triangular part is not referenced. | |
| | | * If uplo = 'L' or 'l', then A contains the lower triangular part | |
| | | * of a hermitian matrix, and the strictly upper triangular part is | |
| | | * not referenced. The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * lda leading dimension of the two-dimensional array containing A. lda | |
| | | * must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(x)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cher.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasCher (char uplo, int n, float alpha, | |
| const cuComplex *x, int incx, cuComplex *A, | | const cuComplex *x, int incx, cuComplex *A, | |
| int lda); | | int lda); | |
|
| void CUBLASAPI cublasChpr (char uplo, int n, cuComplex alpha, | | /* | |
| | | * void | |
| | | * cublasChpr (char uplo, int n, float alpha, const cuComplex *x, int incx, | |
| | | * cuComplex *AP) | |
| | | * | |
| | | * performs the hermitian rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(x)) + A, | |
| | | * | |
| | | * where alpha is a single precision real scalar and x is an n element sing | |
| | | le | |
| | | * precision complex vector. A is a hermitian n x n matrix consisting of si | |
| | | ngle | |
| | | * precision complex elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array AP. If uplo == 'U' or 'u', then the uppe | |
| | | r | |
| | | * triangular part of A is supplied in AP. If uplo == 'L' or 'l', th | |
| | | en | |
| | | * the lower triangular part of A is supplied in AP. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha single precision real scalar multiplier applied to x * conjugate( | |
| | | transpose(x)). | |
| | | * x single precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * AP single precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * The imaginary parts of the diagonal elements need not be set, the | |
| | | y | |
| | | * are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(x)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/chpr.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasChpr (char uplo, int n, float alpha, | |
| const cuComplex *x, int incx, cuComplex *AP); | | const cuComplex *x, int incx, cuComplex *AP); | |
|
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasChpr2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int | |
| | | incx, | |
| | | * const cuComplex *y, int incy, cuComplex *AP) | |
| | | * | |
| | | * performs the hermitian rank 2 operation | |
| | | * | |
| | | * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(tr | |
| | | anspose(x)) + A, | |
| | | * | |
| | | * where alpha is a single precision complex scalar, and x and y are n elem | |
| | | ent single | |
| | | * precision complex vectors. A is a hermitian n x n matrix consisting of s | |
| | | ingle | |
| | | * precision complex elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to x * conjuga | |
| | | te(transpose(y)) + | |
| | | * y * conjugate(transpose(x)). | |
| | | * x single precision complex array of length at least (1 + (n - 1) * | |
| | | abs (incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y single precision complex array of length at least (1 + (n - 1) * | |
| | | abs (incy)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * AP single precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * The imaginary parts of the diagonal elements need not be set, the | |
| | | y | |
| | | * are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*conjugate(transpose(y)) | |
| | | * + conjugate(alpha)*y*conjugate(transpose(x | |
| | | ))+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/chpr2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasChpr2 (char uplo, int n, cuComplex alpha, | |
| | | const cuComplex *x, int incx, const cuComplex * | |
| | | y, | |
| | | int incy, cuComplex *AP); | |
| | | | |
| | | /* | |
| | | * void cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, | |
| | | int incx, | |
| | | * const cuComplex *y, int incy, cuComplex *A, int lda) | |
| | | * | |
| | | * performs the hermitian rank 2 operation | |
| | | * | |
| | | * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(tr | |
| | | anspose(x)) + A, | |
| | | * | |
| | | * where alpha is a single precision complex scalar, x and y are n element | |
| | | single | |
| | | * precision complex vector and A is an n by n hermitian matrix consisting | |
| | | of single | |
| | | * precision complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to x * conjuga | |
| | | te(transpose(y)) + | |
| | | * y * conjugate(transpose(x)). | |
| | | * x single precision array of length at least (1 + (n - 1) * abs (inc | |
| | | x)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y single precision array of length at least (1 + (n - 1) * abs (inc | |
| | | y)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * A single precision complex array of dimensions (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * then A must contains the upper triangular part of a hermitian mat | |
| | | rix, | |
| | | * and the strictly lower triangular parts is not referenced. If upl | |
| | | o == | |
| | | * 'L' or 'l', then A contains the lower triangular part of a hermit | |
| | | ian | |
| | | * matrix, and the strictly upper triangular part is not referenced. | |
| | | * The imaginary parts of the diagonal elements need not be set, | |
| | | * they are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * lda leading dimension of A. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*conjugate(transpose(y)) | |
| | | * + conjugate(alpha)*y*conjugate(transpose(x | |
| | | ))+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cher2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCher2 (char uplo, int n, cuComplex alpha, | | void CUBLASAPI cublasCher2 (char uplo, int n, cuComplex alpha, | |
| const cuComplex *x, int incx, const cuComplex *
y, | | const cuComplex *x, int incx, const cuComplex *
y, | |
| int incy, cuComplex *A, int lda); | | int incy, cuComplex *A, int lda); | |
| void CUBLASAPI cublasChpr2 (char uplo, int n, cuComplex alpha, | | void CUBLASAPI cublasChpr2 (char uplo, int n, cuComplex alpha, | |
| const cuComplex *x, int incx, const cuComplex *
y, | | const cuComplex *x, int incx, const cuComplex *
y, | |
| int incy, cuComplex *AP); | | int incy, cuComplex *AP); | |
| | | | |
| /* ---------------- CUBLAS single precision BLAS3 functions ---------------
- */ | | /* ---------------- CUBLAS single precision BLAS3 functions ---------------
- */ | |
| | | | |
| /* | | /* | |
| | | | |
| skipping to change at line 2495 | | skipping to change at line 4271 | |
| * otherwise the leading k x n part of the array must contains the | | * otherwise the leading k x n part of the array must contains the | |
| * matrix A. | | * matrix A. | |
| * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | |
| * least max(1, n). Otherwise lda must be at least max(1, k). | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| * beta single precision scalar multiplier applied to C. If beta izs zero
, C | | * beta single precision scalar multiplier applied to C. If beta izs zero
, C | |
| * does not have to be a valid input | | * does not have to be a valid input | |
| * C single precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | | * C single precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | |
| * the leading n x n triangular part of the array C must contain the | | * the leading n x n triangular part of the array C must contain the | |
| * upper triangular part of the symmetric matrix C and the strictly | | * upper triangular part of the symmetric matrix C and the strictly | |
| * lower triangular part of C is not referenced. On exit, the upper | | * lower triangular part of C is not referenced. On exit, the upper | |
|
| * triangular part of C is overwritten by the upper trinagular part
of | | * triangular part of C is overwritten by the upper triangular part
of | |
| * the updated matrix. If uplo == 'L' or 'l', the leading n x n | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| * triangular part of the array C must contain the lower triangular
part | | * triangular part of the array C must contain the lower triangular
part | |
| * of the symmetric matrix C and the strictly upper triangular part
of C | | * of the symmetric matrix C and the strictly upper triangular part
of C | |
| * is not referenced. On exit, the lower triangular part of C is | | * is not referenced. On exit, the lower triangular part of C is | |
|
| * overwritten by the lower trinagular part of the updated matrix. | | * overwritten by the lower triangular part of the updated matrix. | |
| * ldc leading dimension of C. It must be at least max(1, n). | | * ldc leading dimension of C. It must be at least max(1, n). | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | | * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | |
| * alpha * transpose(A) * A + beta * C | | * alpha * transpose(A) * A + beta * C | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/ssyrk.f | | * Reference: http://www.netlib.org/blas/ssyrk.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | | |
| skipping to change at line 2580 | | skipping to change at line 4356 | |
| * otherwise the leading k x n part of the array must contain the ma
trix | | * otherwise the leading k x n part of the array must contain the ma
trix | |
| * B. | | * B. | |
| * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be
at | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be
at | |
| * least max(1, n). Otherwise ldb must be at least max(1, k). | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| * beta single precision scalar multiplier applied to C. If beta is zero,
C | | * beta single precision scalar multiplier applied to C. If beta is zero,
C | |
| * does not have to be a valid input. | | * does not have to be a valid input. | |
| * C single precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | | * C single precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | |
| * the leading n x n triangular part of the array C must contain the | | * the leading n x n triangular part of the array C must contain the | |
| * upper triangular part of the symmetric matrix C and the strictly | | * upper triangular part of the symmetric matrix C and the strictly | |
| * lower triangular part of C is not referenced. On exit, the upper | | * lower triangular part of C is not referenced. On exit, the upper | |
|
| * triangular part of C is overwritten by the upper trinagular part
of | | * triangular part of C is overwritten by the upper triangular part
of | |
| * the updated matrix. If uplo == 'L' or 'l', the leading n x n | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| * triangular part of the array C must contain the lower triangular
part | | * triangular part of the array C must contain the lower triangular
part | |
| * of the symmetric matrix C and the strictly upper triangular part
of C | | * of the symmetric matrix C and the strictly upper triangular part
of C | |
| * is not referenced. On exit, the lower triangular part of C is | | * is not referenced. On exit, the lower triangular part of C is | |
|
| * overwritten by the lower trinagular part of the updated matrix. | | * overwritten by the lower triangular part of the updated matrix. | |
| * ldc leading dimension of C. Must be at least max(1, n). | | * ldc leading dimension of C. Must be at least max(1, n). | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A)
+ | | * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A)
+ | |
| * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | | * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/ssyr2k.f | | * Reference: http://www.netlib.org/blas/ssyr2k.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | | |
| skipping to change at line 2824 | | skipping to change at line 4600 | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 | | * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasCgemm (char transa, char transb, int m, int n, int k, | | void CUBLASAPI cublasCgemm (char transa, char transb, int m, int n, int k, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| const cuComplex *B, int ldb, cuComplex beta, | | const cuComplex *B, int ldb, cuComplex beta, | |
| cuComplex *C, int ldc); | | cuComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, const cuComplex *B, int ldb, | |
| | | * cuComplex beta, cuComplex *C, int ldc); | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * C = alpha * A * B + beta * C, or | |
| | | * C = alpha * B * A + beta * C, | |
| | | * | |
| | | * where alpha and beta are single precision complex scalars, A is a symmet | |
| | | ric matrix | |
| | | * consisting of single precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode, and B and C are m x n matrices consisting of single precis | |
| | | ion | |
| | | * complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether the symmetric matrix A appears on the left side | |
| | | * hand side or right hand side of matrix B, as follows. If side == | |
| | | 'L' | |
| | | * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', | |
| | | * then C = alpha * B * A + beta * C. | |
| | | * uplo specifies whether the symmetric matrix A is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the symmetric matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the symmetric matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * m specifies the number of rows of the matrix C, and the number of r | |
| | | ows | |
| | | * of matrix B. It also specifies the dimensions of symmetric matrix | |
| | | A | |
| | | * when side == 'L' or 'l'. m must be at least zero. | |
| | | * n specifies the number of columns of the matrix C, and the number o | |
| | | f | |
| | | * columns of matrix B. It also specifies the dimensions of symmetri | |
| | | c | |
| | | * matrix A when side == 'R' or 'r'. n must be at least zero. | |
| | | * alpha single precision scalar multiplier applied to A * B, or B * A | |
| | | * A single precision array of dimensions (lda, ka), where ka is m whe | |
| | | n | |
| | | * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the | |
| | | * leading m x m part of array A must contain the symmetric matrix, | |
| | | * such that when uplo == 'U' or 'u', the leading m x m part stores | |
| | | the | |
| | | * upper triangular part of the symmetric matrix, and the strictly l | |
| | | ower | |
| | | * triangular part of A is not referenced, and when uplo == 'U' or ' | |
| | | u', | |
| | | * the leading m x m part stores the lower triangular part of the | |
| | | * symmetric matrix and the strictly upper triangular part is not | |
| | | * referenced. If side == 'R' or 'r' the leading n x n part of array | |
| | | A | |
| | | * must contain the symmetric matrix, such that when uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n part stores the upper triangular part of the | |
| | | * symmetric matrix and the strictly lower triangular part of A is n | |
| | | ot | |
| | | * referenced, and when uplo == 'U' or 'u', the leading n x n part | |
| | | * stores the lower triangular part of the symmetric matrix and the | |
| | | * strictly upper triangular part is not referenced. | |
| | | * lda leading dimension of A. When side == 'L' or 'l', it must be at le | |
| | | ast | |
| | | * max(1, m) and at least max(1, n) otherwise. | |
| | | * B single precision array of dimensions (ldb, n). On entry, the lead | |
| | | ing | |
| | | * m x n part of the array contains the matrix B. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * beta single precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input | |
| | | * C single precision array of dimensions (ldc, n) | |
| | | * ldc leading dimension of C. Must be at least max(1, m) | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * B + beta * C, or C = alpha * | |
| | | * B * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/csymm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCsymm (char side, char uplo, int m, int n, | | void CUBLASAPI cublasCsymm (char side, char uplo, int m, int n, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| const cuComplex *B, int ldb, cuComplex beta, | | const cuComplex *B, int ldb, cuComplex beta, | |
| cuComplex *C, int ldc); | | cuComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasChemm (char side, char uplo, int m, int n, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, const cuComplex *B, int ldb, | |
| | | * cuComplex beta, cuComplex *C, int ldc); | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * C = alpha * A * B + beta * C, or | |
| | | * C = alpha * B * A + beta * C, | |
| | | * | |
| | | * where alpha and beta are single precision complex scalars, A is a hermit | |
| | | ian matrix | |
| | | * consisting of single precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode, and B and C are m x n matrices consisting of single precis | |
| | | ion | |
| | | * complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether the hermitian matrix A appears on the left side | |
| | | * hand side or right hand side of matrix B, as follows. If side == | |
| | | 'L' | |
| | | * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', | |
| | | * then C = alpha * B * A + beta * C. | |
| | | * uplo specifies whether the hermitian matrix A is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * m specifies the number of rows of the matrix C, and the number of r | |
| | | ows | |
| | | * of matrix B. It also specifies the dimensions of hermitian matrix | |
| | | A | |
| | | * when side == 'L' or 'l'. m must be at least zero. | |
| | | * n specifies the number of columns of the matrix C, and the number o | |
| | | f | |
| | | * columns of matrix B. It also specifies the dimensions of hermitia | |
| | | n | |
| | | * matrix A when side == 'R' or 'r'. n must be at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to A * B, or B | |
| | | * A | |
| | | * A single precision complex array of dimensions (lda, ka), where ka | |
| | | is m when | |
| | | * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the | |
| | | * leading m x m part of array A must contain the hermitian matrix, | |
| | | * such that when uplo == 'U' or 'u', the leading m x m part stores | |
| | | the | |
| | | * upper triangular part of the hermitian matrix, and the strictly l | |
| | | ower | |
| | | * triangular part of A is not referenced, and when uplo == 'U' or ' | |
| | | u', | |
| | | * the leading m x m part stores the lower triangular part of the | |
| | | * hermitian matrix and the strictly upper triangular part is not | |
| | | * referenced. If side == 'R' or 'r' the leading n x n part of array | |
| | | A | |
| | | * must contain the hermitian matrix, such that when uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n part stores the upper triangular part of the | |
| | | * hermitian matrix and the strictly lower triangular part of A is n | |
| | | ot | |
| | | * referenced, and when uplo == 'U' or 'u', the leading n x n part | |
| | | * stores the lower triangular part of the hermitian matrix and the | |
| | | * strictly upper triangular part is not referenced. The imaginary p | |
| | | arts | |
| | | * of the diagonal elements need not be set, they are assumed to be | |
| | | zero. | |
| | | * lda leading dimension of A. When side == 'L' or 'l', it must be at le | |
| | | ast | |
| | | * max(1, m) and at least max(1, n) otherwise. | |
| | | * B single precision complex array of dimensions (ldb, n). On entry, | |
| | | the leading | |
| | | * m x n part of the array contains the matrix B. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * beta single precision complex scalar multiplier applied to C. If beta | |
| | | is zero, C | |
| | | * does not have to be a valid input | |
| | | * C single precision complex array of dimensions (ldc, n) | |
| | | * ldc leading dimension of C. Must be at least max(1, m) | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * B + beta * C, or C = alpha * | |
| | | * B * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/chemm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasChemm (char side, char uplo, int m, int n, | | void CUBLASAPI cublasChemm (char side, char uplo, int m, int n, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| const cuComplex *B, int ldb, cuComplex beta, | | const cuComplex *B, int ldb, cuComplex beta, | |
| cuComplex *C, int ldc); | | cuComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCsyrk (char uplo, char trans, int n, int k, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, cuComplex beta, cuComplex *C, | |
| | | int ldc) | |
| | | * | |
| | | * performs one of the symmetric rank k operations | |
| | | * | |
| | | * C = alpha * A * transpose(A) + beta * C, or | |
| | | * C = alpha * transpose(A) * A + beta * C. | |
| | | * | |
| | | * Alpha and beta are single precision complex scalars. C is an n x n symme | |
| | | tric matrix | |
| | | * consisting of single precision complex elements and stored in either low | |
| | | er or | |
| | | * upper storage mode. A is a matrix consisting of single precision complex | |
| | | elements | |
| | | * with dimension of n x k in the first case, and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the symmetric matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the symmetric matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the symmetric matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | C = | |
| | | * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c | |
| | | ', | |
| | | * C = transpose(A) * A + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of rows of matrix | |
| | | A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to A * transpo | |
| | | se(A) or | |
| | | * transpose(A) * A. | |
| | | * A single precision complex array of dimensions (lda, ka), where ka | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contains the | |
| | | * matrix A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| | | * beta single precision complex scalar multiplier applied to C. If beta | |
| | | izs zero, C | |
| | | * does not have to be a valid input | |
| | | * C single precision complex array of dimensions (ldc, n). If uplo = | |
| | | 'U' or 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the symmetric matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo = 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the symmetric matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * ldc leading dimension of C. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * transpose(A) + beta * C, or | |
| | | C = | |
| | | * alpha * transpose(A) * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/csyrk.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | | |
| void CUBLASAPI cublasCsyrk (char uplo, char trans, int n, int k, | | void CUBLASAPI cublasCsyrk (char uplo, char trans, int n, int k, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| cuComplex beta, cuComplex *C, int ldc); | | cuComplex beta, cuComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCherk (char uplo, char trans, int n, int k, float alpha, | |
| | | * const cuComplex *A, int lda, float beta, cuComplex *C, int | |
| | | ldc) | |
| | | * | |
| | | * performs one of the hermitian rank k operations | |
| | | * | |
| | | * C = alpha * A * conjugate(transpose(A)) + beta * C, or | |
| | | * C = alpha * conjugate(transpose(A)) * A + beta * C. | |
| | | * | |
| | | * Alpha and beta are single precision real scalars. C is an n x n hermitia | |
| | | n matrix | |
| | | * consisting of single precision complex elements and stored in either low | |
| | | er or | |
| | | * upper storage mode. A is a matrix consisting of single precision complex | |
| | | elements | |
| | | * with dimension of n x k in the first case, and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the hermitian matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | C = | |
| | | * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', | |
| | | 't', 'C', or 'c', | |
| | | * C = alpha * conjugate(transpose(A)) * A + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of columns of matr | |
| | | ix A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha single precision scalar multiplier applied to A * conjugate(trans | |
| | | pose(A)) or | |
| | | * conjugate(transpose(A)) * A. | |
| | | * A single precision complex array of dimensions (lda, ka), where ka | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contains the | |
| | | * matrix A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| | | * beta single precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input. | |
| | | * C single precision complex array of dimensions (ldc, n). If uplo = | |
| | | 'U' or 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the hermitian matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo = 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the hermitian matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * ldc leading dimension of C. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * conjugate(transpose(A)) + be | |
| | | ta * C, or C = | |
| | | * alpha * conjugate(transpose(A)) * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cherk.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCherk (char uplo, char trans, int n, int k, | | void CUBLASAPI cublasCherk (char uplo, char trans, int n, int k, | |
|
| cuComplex alpha, const cuComplex *A, int lda, | | float alpha, const cuComplex *A, int lda, | |
| cuComplex beta, cuComplex *C, int ldc); | | float beta, cuComplex *C, int ldc); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasCsyr2k (char uplo, char trans, int n, int k, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, const cuComplex *B, int ldb, | |
| | | * cuComplex beta, cuComplex *C, int ldc) | |
| | | * | |
| | | * performs one of the symmetric rank 2k operations | |
| | | * | |
| | | * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, o | |
| | | r | |
| | | * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. | |
| | | * | |
| | | * Alpha and beta are single precision complex scalars. C is an n x n symme | |
| | | tric matrix | |
| | | * consisting of single precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode. A and B are matrices consisting of single precision comple | |
| | | x elements | |
| | | * with dimension of n x k in the first case, and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the symmetric matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the symmetric matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the symmetric matrix is to be references | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * | |
| | | C, | |
| | | * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + | |
| | | * alpha * transpose(B) * A + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of rows of matrix | |
| | | A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha single precision complex scalar multiplier. | |
| | | * A single precision complex array of dimensions (lda, ka), where ka | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1,k). | |
| | | * B single precision complex array of dimensions (lda, kb), where kb | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array B must contain the matrix B, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * B. | |
| | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be | |
| | | at | |
| | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| | | * beta single precision complex scalar multiplier applied to C. If beta | |
| | | is zero, C | |
| | | * does not have to be a valid input. | |
| | | * C single precision complex array of dimensions (ldc, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the symmetric matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the symmetric matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * ldc leading dimension of C. Must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) | |
| | | + | |
| | | * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/csyr2k.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCsyr2k (char uplo, char trans, int n, int k, | | void CUBLASAPI cublasCsyr2k (char uplo, char trans, int n, int k, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
| const cuComplex *B, int ldb, cuComplex beta, | | const cuComplex *B, int ldb, cuComplex beta, | |
| cuComplex *C, int ldc); | | cuComplex *C, int ldc); | |
|
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasCher2k (char uplo, char trans, int n, int k, cuComplex alpha, | |
| | | * const cuComplex *A, int lda, const cuComplex *B, int ldb, | |
| | | * float beta, cuComplex *C, int ldc) | |
| | | * | |
| | | * performs one of the hermitian rank 2k operations | |
| | | * | |
| | | * C = alpha * A * conjugate(transpose(B)) | |
| | | * + conjugate(alpha) * B * conjugate(transpose(A)) | |
| | | * + beta * C , | |
| | | * or | |
| | | * C = alpha * conjugate(transpose(A)) * B | |
| | | * + conjugate(alpha) * conjugate(transpose(B)) * A | |
| | | * + beta * C. | |
| | | * | |
| | | * Alpha is single precision complex scalar whereas Beta is a single preoci | |
| | | sion real scalar. | |
| | | * C is an n x n hermitian matrix consisting of single precision complex el | |
| | | ements | |
| | | * and stored in either lower or upper storage mode. A and B are matrices c | |
| | | onsisting | |
| | | * of single precision complex elements with dimension of n x k in the firs | |
| | | t case, | |
| | | * and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the hermitian matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be references | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | * C = alpha * A * conjugate(transpose(B)) | |
| | | * + conjugate(alpha) * B * conjugate(transpose(A)) | |
| | | * + beta * C . | |
| | | * If trans == 'T', 't', 'C', or 'c', | |
| | | * C = alpha * conjugate(transpose(A)) * B | |
| | | * + conjugate(alpha) * conjugate(transpose(B)) * A | |
| | | * + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of rows of matrix | |
| | | A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha single precision complex scalar multiplier. | |
| | | * A single precision complex array of dimensions (lda, ka), where ka | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1,k). | |
| | | * B single precision complex array of dimensions (lda, kb), where kb | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array B must contain the matrix B, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * B. | |
| | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be | |
| | | at | |
| | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| | | * beta single precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input. | |
| | | * C single precision complex array of dimensions (ldc, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the hermitian matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the hermitian matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * ldc leading dimension of C. Must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to alpha*A*conjugate(transpose(B)) + | |
| | | * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or | |
| | | * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(tran | |
| | | spose(B))*A | |
| | | * + beta*C. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/cher2k.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCher2k (char uplo, char trans, int n, int k, | | void CUBLASAPI cublasCher2k (char uplo, char trans, int n, int k, | |
| cuComplex alpha, const cuComplex *A, int lda, | | cuComplex alpha, const cuComplex *A, int lda, | |
|
| const cuComplex *B, int ldb, cuComplex beta, | | const cuComplex *B, int ldb, float beta, | |
| cuComplex *C, int ldc); | | cuComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCtrmm (char side, char uplo, char transa, char diag, int m, int n, | |
| | | * cuComplex alpha, const cuComplex *A, int lda, const cuCompl | |
| | | ex *B, | |
| | | * int ldb) | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * B = alpha * op(A) * B, or B = alpha * B * op(A) | |
| | | * | |
| | | * where alpha is a single-precision complex scalar, B is an m x n matrix c | |
| | | omposed | |
| | | * of single precision complex elements, and A is a unit or non-unit, upper | |
| | | or lower, | |
| | | * triangular matrix composed of single precision complex elements. op(A) i | |
| | | s one of | |
| | | * | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) | |
| | | * | |
| | | * Matrices A and B are stored in column major format, and lda and ldb are | |
| | | * the leading dimensions of the two-dimensonials arrays that contain A and | |
| | | * B, respectively. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether op(A) multiplies B from the left or right. | |
| | | * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = | |
| | | * 'R' or 'r', then B = alpha * B * op(A). | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. | |
| | | * If uplo = 'L' or 'l', A is a lower triangular matrix. | |
| | | * transa specifies the form of op(A) to be used in the matrix | |
| | | * multiplication. If transa = 'N' or 'n', then op(A) = A. If | |
| | | * transa = 'T' or 't', then op(A) = transpose(A). | |
| | | * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether or not A is unit triangular. If diag = 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag = 'N' or | |
| | | * 'n', A is not assumed to be unit triangular. | |
| | | * m the number of rows of matrix B. m must be at least zero. | |
| | | * n the number of columns of matrix B. n must be at least zero. | |
| | | * alpha single precision complex scalar multiplier applied to op(A)*B, or | |
| | | * B*op(A), respectively. If alpha is zero no accesses are made | |
| | | * to matrix A, and no read accesses are made to matrix B. | |
| | | * A single precision complex array of dimensions (lda, k). k = m if s | |
| | | ide = | |
| | | * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' | |
| | | * the leading k x k upper triangular part of the array A must | |
| | | * contain the upper triangular matrix, and the strictly lower | |
| | | * triangular part of A is not referenced. If uplo = 'L' or 'l' | |
| | | * the leading k x k lower triangular part of the array A must | |
| | | * contain the lower triangular matrix, and the strictly upper | |
| | | * triangular part of A is not referenced. When diag = 'U' or 'u' | |
| | | * the diagonal elements of A are no referenced and are assumed | |
| | | * to be unity. | |
| | | * lda leading dimension of A. When side = 'L' or 'l', it must be at | |
| | | * least max(1,m) and at least max(1,n) otherwise | |
| | | * B single precision complex array of dimensions (ldb, n). On entry, | |
| | | the | |
| | | * leading m x n part of the array contains the matrix B. It is | |
| | | * overwritten with the transformed matrix on exit. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * B updated according to B = alpha * op(A) * B or B = alpha * B * op | |
| | | (A) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctrmm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtrmm (char side, char uplo, char transa, char diag, | | void CUBLASAPI cublasCtrmm (char side, char uplo, char transa, char diag, | |
| int m, int n, cuComplex alpha, const cuComplex
*A, | | int m, int n, cuComplex alpha, const cuComplex
*A, | |
| int lda, cuComplex *B, int ldb); | | int lda, cuComplex *B, int ldb); | |
|
| | | /* | |
| | | * void | |
| | | * cublasCtrsm (char side, char uplo, char transa, char diag, int m, int n, | |
| | | * cuComplex alpha, const cuComplex *A, int lda, | |
| | | * cuComplex *B, int ldb) | |
| | | * | |
| | | * solves one of the matrix equations | |
| | | * | |
| | | * op(A) * X = alpha * B, or X * op(A) = alpha * B, | |
| | | * | |
| | | * where alpha is a single precision complex scalar, and X and B are m x n | |
| | | matrices | |
| | | * that are composed of single precision complex elements. A is a unit or n | |
| | | on-unit, | |
| | | * upper or lower triangular matrix, and op(A) is one of | |
| | | * | |
| | | * op(A) = A or op(A) = transpose(A) or op( A ) = conj( A' ). | |
| | | * | |
| | | * The result matrix X overwrites input matrix B; that is, on exit the resu | |
| | | lt | |
| | | * is stored in B. Matrices A and B are stored in column major format, and | |
| | | * lda and ldb are the leading dimensions of the two-dimensonials arrays th | |
| | | at | |
| | | * contain A and B, respectively. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether op(A) appears on the left or right of X as | |
| | | * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. | |
| | | * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix as follows: uplo = 'U' or 'u' indicates A is an upper | |
| | | * triangular matrix. uplo = 'L' or 'l' indicates A is a lower | |
| | | * triangular matrix. | |
| | | * transa specifies the form of op(A) to be used in matrix multiplication | |
| | | * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = | |
| | | * 'T', 't', 'C', or 'c', then op(A) = transpose(A). | |
| | | * diag specifies whether or not A is a unit triangular matrix like so: | |
| | | * if diag = 'U' or 'u', A is assumed to be unit triangular. If | |
| | | * diag = 'N' or 'n', then A is not assumed to be unit triangular. | |
| | | * m specifies the number of rows of B. m must be at least zero. | |
| | | * n specifies the number of columns of B. n must be at least zero. | |
| | | * alpha is a single precision complex scalar to be multiplied with B. Whe | |
| | | n alpha is | |
| | | * zero, then A is not referenced and B need not be set before entry | |
| | | . | |
| | | * A is a single precision complex array of dimensions (lda, k), where | |
| | | k is | |
| | | * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If | |
| | | * uplo = 'U' or 'u', the leading k x k upper triangular part of | |
| | | * the array A must contain the upper triangular matrix and the | |
| | | * strictly lower triangular matrix of A is not referenced. When | |
| | | * uplo = 'L' or 'l', the leading k x k lower triangular part of | |
| | | * the array A must contain the lower triangular matrix and the | |
| | | * strictly upper triangular part of A is not referenced. Note that | |
| | | * when diag = 'U' or 'u', the diagonal elements of A are not | |
| | | * referenced, and are assumed to be unity. | |
| | | * lda is the leading dimension of the two dimensional array containing | |
| | | A. | |
| | | * When side = 'L' or 'l' then lda must be at least max(1, m), when | |
| | | * side = 'R' or 'r' then lda must be at least max(1, n). | |
| | | * B is a single precision complex array of dimensions (ldb, n). ldb m | |
| | | ust be | |
| | | * at least max (1,m). The leading m x n part of the array B must | |
| | | * contain the right-hand side matrix B. On exit B is overwritten | |
| | | * by the solution matrix X. | |
| | | * ldb is the leading dimension of the two dimensional array containing | |
| | | B. | |
| | | * ldb must be at least max(1, m). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * B contains the solution matrix X satisfying op(A) * X = alpha * B, | |
| | | * or X * op(A) = alpha * B | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ctrsm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| void CUBLASAPI cublasCtrsm (char side, char uplo, char transa, char diag, | | void CUBLASAPI cublasCtrsm (char side, char uplo, char transa, char diag, | |
| int m, int n, cuComplex alpha, const cuComplex
*A, | | int m, int n, cuComplex alpha, const cuComplex
*A, | |
| int lda, cuComplex *B, int ldb); | | int lda, cuComplex *B, int ldb); | |
|
| | | | |
| void CUBLASAPI cublasXerbla (const char *srName, int info); | | void CUBLASAPI cublasXerbla (const char *srName, int info); | |
| | | | |
| /* ---------------- CUBLAS double-precision BLAS1 functions ---------------
- */ | | /* ---------------- CUBLAS double-precision BLAS1 functions ---------------
- */ | |
| | | | |
| /* | | /* | |
| * double | | * double | |
| * cublasDasum (int n, const double *x, int incx) | | * cublasDasum (int n, const double *x, int incx) | |
| * | | * | |
| * computes the sum of the absolute values of the elements of double | | * computes the sum of the absolute values of the elements of double | |
| * precision vector x; that is, the result is the sum from i = 0 to n - 1 o
f | | * precision vector x; that is, the result is the sum from i = 0 to n - 1 o
f | |
| | | | |
| skipping to change at line 3068 | | skipping to change at line 5474 | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasDrot (int n, double *x, int incx, double *y, int incy, | | void CUBLASAPI cublasDrot (int n, double *x, int incx, double *y, int incy, | |
| double sc, double ss); | | double sc, double ss); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| * cublasDrotg (double *sa, double *sb, double *sc, double *ss) | | * cublasDrotg (double *host_sa, double *host_sb, double *host_sc, double *
host_ss) | |
| * | | * | |
| * constructs the Givens tranformation | | * constructs the Givens tranformation | |
| * | | * | |
| * ( sc ss ) | | * ( sc ss ) | |
| * G = ( ) , sc^2 + ss^2 = 1, | | * G = ( ) , sc^2 + ss^2 = 1, | |
| * (-ss sc ) | | * (-ss sc ) | |
| * | | * | |
| * which zeros the second entry of the 2-vector transpose(sa, sb). | | * which zeros the second entry of the 2-vector transpose(sa, sb). | |
| * | | * | |
| * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The | | * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The | |
| * value of sb is overwritten by a value z which allows sc and ss to be | | * value of sb is overwritten by a value z which allows sc and ss to be | |
| * recovered by the following algorithm: | | * recovered by the following algorithm: | |
| * | | * | |
| * if z=1 set sc = 0.0 and ss = 1.0 | | * if z=1 set sc = 0.0 and ss = 1.0 | |
| * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z | | * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z | |
| * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) | | * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) | |
| * | | * | |
| * The function drot (n, x, incx, y, incy, sc, ss) normally is called next | | * The function drot (n, x, incx, y, incy, sc, ss) normally is called next | |
| * to apply the transformation to a 2 x n matrix. | | * to apply the transformation to a 2 x n matrix. | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * sa double-precision scalar | | * sa double-precision scalar | |
| * sb double-precision scalar | | * sb double-precision scalar | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * sa double-precision r | | * sa double-precision r | |
| * sb double-precision z | | * sb double-precision z | |
| * sc double-precision result | | * sc double-precision result | |
| * ss double-precision result | | * ss double-precision result | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/drotg.f | | * Reference: http://www.netlib.org/blas/drotg.f | |
| * | | * | |
| * This function does not set any error status. | | * This function does not set any error status. | |
| */ | | */ | |
|
| void CUBLASAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss)
; | | void CUBLASAPI cublasDrotg (double *host_sa, double *host_sb, double *host_
sc, double *host_ss); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasDrotm (int n, double *x, int incx, double *y, int incy, | | * cublasDrotm (int n, double *x, int incx, double *y, int incy, | |
| * const double* sparam) | | * const double* sparam) | |
| * | | * | |
| * applies the modified Givens transformation, h, to the 2 x n matrix | | * applies the modified Givens transformation, h, to the 2 x n matrix | |
| * | | * | |
| * ( transpose(x) ) | | * ( transpose(x) ) | |
| * ( transpose(y) ) | | * ( transpose(y) ) | |
| | | | |
| skipping to change at line 3127 | | skipping to change at line 5535 | |
| * The elements of x are in x[lx + i * incx], i = 0 to n-1, where lx = 1 if | | * The elements of x are in x[lx + i * incx], i = 0 to n-1, where lx = 1 if | |
| * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly an
d | | * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly an
d | |
| * incy. With sparam[0] = sflag, h has one of the following forms: | | * incy. With sparam[0] = sflag, h has one of the following forms: | |
| * | | * | |
| * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 | | * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 | |
| * | | * | |
| * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) | | * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) | |
| * h = ( ) ( ) ( ) ( ) | | * h = ( ) ( ) ( ) ( ) | |
| * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) | | * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) | |
| * | | * | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * n number of elements in input vectors | | * n number of elements in input vectors | |
| * x double-precision vector with n elements | | * x double-precision vector with n elements | |
| * incx storage spacing between elements of x | | * incx storage spacing between elements of x | |
| * y double-precision vector with n elements | | * y double-precision vector with n elements | |
| * incy storage spacing between elements of y | | * incy storage spacing between elements of y | |
| * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | | * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | |
| * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | | * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | |
| * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | | * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | |
| | | | |
| skipping to change at line 3159 | | skipping to change at line 5570 | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, | | void CUBLASAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, | |
| const double* sparam); | | const double* sparam); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| * cublasDrotmg (double *psd1, double *psd2, double *psx1, const double *ps | | * cublasDrotmg (double *host_sd1, double *host_sd2, double *host_sx1, cons | |
| y1, | | t double *host_sy1, | |
| * double *sparam) | | * double *host_sparam) | |
| * | | * | |
| * constructs the modified Givens transformation matrix h which zeros | | * constructs the modified Givens transformation matrix h which zeros | |
| * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*s
y1). | | * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*s
y1). | |
| * With sparam[0] = sflag, h has one of the following forms: | | * With sparam[0] = sflag, h has one of the following forms: | |
| * | | * | |
| * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 | | * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 | |
| * | | * | |
| * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) | | * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) | |
| * h = ( ) ( ) ( ) ( ) | | * h = ( ) ( ) ( ) ( ) | |
| * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) | | * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) | |
| * | | * | |
| * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, | | * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, | |
| * respectively. Values of 1.0, -1.0, or 0.0 implied by the value | | * respectively. Values of 1.0, -1.0, or 0.0 implied by the value | |
| * of sflag are not stored in sparam. | | * of sflag are not stored in sparam. | |
|
| | | * Note that is function is provided for completeness and run exclusively | |
| | | * on the Host. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * sd1 single precision scalar | | * sd1 single precision scalar | |
| * sd2 single precision scalar | | * sd2 single precision scalar | |
| * sx1 single precision scalar | | * sx1 single precision scalar | |
| * sy1 single precision scalar | | * sy1 single precision scalar | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| | | | |
| skipping to change at line 3197 | | skipping to change at line 5610 | |
| * sx1 changed to represent the effect of the transformation | | * sx1 changed to represent the effect of the transformation | |
| * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | | * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] | |
| * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | | * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] | |
| * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | | * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, | |
| * and sprams[4] contains sh11. | | * and sprams[4] contains sh11. | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/drotmg.f | | * Reference: http://www.netlib.org/blas/drotmg.f | |
| * | | * | |
| * This functions does not set any error status. | | * This functions does not set any error status. | |
| */ | | */ | |
|
| void CUBLASAPI cublasDrotmg (double *sd1, double *sd2, double *sx1, | | void CUBLASAPI cublasDrotmg (double *host_sd1, double *host_sd2, double *ho | |
| const double *sy1, double* sparam); | | st_sx1, | |
| | | const double *host_sy1, double* host_sparam); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasDscal (int n, double alpha, double *x, int incx) | | * cublasDscal (int n, double alpha, double *x, int incx) | |
| * | | * | |
| * replaces double-precision vector x with double-precision alpha * x. For | | * replaces double-precision vector x with double-precision alpha * x. For | |
| * i = 0 to n-1, it replaces x[lx + i * incx] with alpha * x[lx + i * incx]
, | | * i = 0 to n-1, it replaces x[lx + i * incx] with alpha * x[lx + i * incx]
, | |
| * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx. | | * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx. | |
| * | | * | |
| * Input | | * Input | |
| | | | |
| skipping to change at line 3493 | | skipping to change at line 5906 | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasDsyr (char uplo, int n, double alpha, | | void CUBLASAPI cublasDsyr (char uplo, int n, double alpha, | |
| const double *x, int incx, double *A, | | const double *x, int incx, double *A, | |
| int lda); | | int lda); | |
| | | | |
| /* | | /* | |
|
| | | * void cublasDsyr2 (char uplo, int n, double alpha, const double *x, int i | |
| | | ncx, | |
| | | * const double *y, int incy, double *A, int lda) | |
| | | * | |
| | | * performs the symmetric rank 2 operation | |
| | | * | |
| | | * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, | |
| | | * | |
| | | * where alpha is a double precision scalar, x and y are n element double | |
| | | * precision vector and A is an n by n symmetric matrix consisting of doubl | |
| | | e | |
| | | * precision elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision scalar multiplier applied to x * transpose(y) + | |
| | | * y * transpose(x). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | x)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | y)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * A double precision array of dimensions (lda, n). If uplo == 'U' or | |
| | | 'u', | |
| | | * then A must contains the upper triangular part of a symmetric mat | |
| | | rix, | |
| | | * and the strictly lower triangular parts is not referenced. If upl | |
| | | o == | |
| | | * 'L' or 'l', then A contains the lower triangular part of a symmet | |
| | | ric | |
| | | * matrix, and the strictly upper triangular part is not referenced. | |
| | | * lda leading dimension of A. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x | |
| | | )+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dsyr2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDsyr2 (char uplo, int n, double alpha, | |
| | | const double *x, int incx, const double *y, | |
| | | int incy, double *A, int lda); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDspr (char uplo, int n, double alpha, const double *x, int incx, | |
| | | * double *AP) | |
| | | * | |
| | | * performs the symmetric rank 1 operation | |
| | | * | |
| | | * A = alpha * x * transpose(x) + A, | |
| | | * | |
| | | * where alpha is a double precision scalar and x is an n element double | |
| | | * precision vector. A is a symmetric n x n matrix consisting of double | |
| | | * precision elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array AP. If uplo == 'U' or 'u', then the uppe | |
| | | r | |
| | | * triangular part of A is supplied in AP. If uplo == 'L' or 'l', th | |
| | | en | |
| | | * the lower triangular part of A is supplied in AP. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision scalar multiplier applied to x * transpose(x). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * AP double precision array with at least ((n * (n + 1)) / 2) elements | |
| | | . If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * transpose(x) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dspr.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDspr (char uplo, int n, double alpha, | |
| | | const double *x, int incx, double *AP); | |
| | | /* | |
| | | * void | |
| | | * cublasDspr2 (char uplo, int n, double alpha, const double *x, int incx, | |
| | | * const double *y, int incy, double *AP) | |
| | | * | |
| | | * performs the symmetric rank 2 operation | |
| | | * | |
| | | * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, | |
| | | * | |
| | | * where alpha is a double precision scalar, and x and y are n element doub | |
| | | le | |
| | | * precision vectors. A is a symmetric n x n matrix consisting of double | |
| | | * precision elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision scalar multiplier applied to x * transpose(y) + | |
| | | * y * transpose(x). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | x)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | y)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * AP double precision array with at least ((n * (n + 1)) / 2) elements | |
| | | . If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x | |
| | | )+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dspr2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDspr2 (char uplo, int n, double alpha, | |
| | | const double *x, int incx, const double *y, | |
| | | int incy, double *AP); | |
| | | | |
| | | /* | |
| * void | | * void | |
| * cublasDtrsv (char uplo, char trans, char diag, int n, const double *A, | | * cublasDtrsv (char uplo, char trans, char diag, int n, const double *A, | |
| * int lda, double *x, int incx) | | * int lda, double *x, int incx) | |
| * | | * | |
| * solves a system of equations op(A) * x = b, where op(A) is either A or | | * solves a system of equations op(A) * x = b, where op(A) is either A or | |
| * transpose(A). b and x are double precision vectors consisting of n | | * transpose(A). b and x are double precision vectors consisting of n | |
| * elements, and A is an n x n matrix composed of a unit or non-unit, upper | | * elements, and A is an n x n matrix composed of a unit or non-unit, upper | |
| * or lower triangular matrix. Matrix A is stored in column major format, | | * or lower triangular matrix. Matrix A is stored in column major format, | |
|
| * and lda is the leading dimension of the two-diemnsional array containing | | * and lda is the leading dimension of the two-dimensional array containing | |
| * A. | | * A. | |
| * | | * | |
| * No test for singularity or near-singularity is included in this function
. | | * No test for singularity or near-singularity is included in this function
. | |
| * Such tests must be performed before calling this function. | | * Such tests must be performed before calling this function. | |
| * | | * | |
| * Input | | * Input | |
| * ----- | | * ----- | |
| * uplo specifies whether the matrix data is stored in the upper or the | | * uplo specifies whether the matrix data is stored in the upper or the | |
| * lower triangular part of array A. If uplo = 'U' or 'u', then only | | * lower triangular part of array A. If uplo = 'U' or 'u', then only | |
| * the upper triangular part of A may be referenced. If uplo = 'L' o
r | | * the upper triangular part of A may be referenced. If uplo = 'L' o
r | |
| * 'l', then only the lower triangular part of A may be referenced. | | * 'l', then only the lower triangular part of A may be referenced. | |
| * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = '
t', | | * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = '
t', | |
| * 'T', 'c', or 'C', op(A) = transpose(A) | | * 'T', 'c', or 'C', op(A) = transpose(A) | |
| * diag specifies whether or not A is a unit triangular matrix like so: | | * diag specifies whether or not A is a unit triangular matrix like so: | |
| * if diag = 'U' or 'u', A is assumed to be unit triangular. If | | * if diag = 'U' or 'u', A is assumed to be unit triangular. If | |
| * diag = 'N' or 'n', then A is not assumed to be unit triangular. | | * diag = 'N' or 'n', then A is not assumed to be unit triangular. | |
| * n specifies the number of rows and columns of the matrix A. It | | * n specifies the number of rows and columns of the matrix A. It | |
|
| * must be at least 0. In the current implementation n must be <= | | * must be at least 0. | |
| * 2040. | | | |
| * A is a double precision array of dimensions (lda, n). If uplo = 'U' | | * A is a double precision array of dimensions (lda, n). If uplo = 'U' | |
| * or 'u', then A must contains the upper triangular part of a symme
tric | | * or 'u', then A must contains the upper triangular part of a symme
tric | |
| * matrix, and the strictly lower triangular parts is not referenced
. | | * matrix, and the strictly lower triangular parts is not referenced
. | |
| * If uplo = 'L' or 'l', then A contains the lower triangular part o
f | | * If uplo = 'L' or 'l', then A contains the lower triangular part o
f | |
| * a symmetric matrix, and the strictly upper triangular part is not | | * a symmetric matrix, and the strictly upper triangular part is not | |
| * referenced. | | * referenced. | |
| * lda is the leading dimension of the two-dimensional array containing
A. | | * lda is the leading dimension of the two-dimensional array containing
A. | |
| * lda must be at least max(1, n). | | * lda must be at least max(1, n). | |
| * x double precision array of length at least (1 + (n - 1) * abs(incx
)). | | * x double precision array of length at least (1 + (n - 1) * abs(incx
)). | |
| * On entry, x contains the n element right-hand side vector b. On e
xit, | | * On entry, x contains the n element right-hand side vector b. On e
xit, | |
| | | | |
| skipping to change at line 3546 | | skipping to change at line 6117 | |
| * ------ | | * ------ | |
| * x updated to contain the solution vector x that solves op(A) * x =
b. | | * x updated to contain the solution vector x that solves op(A) * x =
b. | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/dtrsv.f | | * Reference: http://www.netlib.org/blas/dtrsv.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| * | | * | |
| * Error Status | | * Error Status | |
| * ------------ | | * ------------ | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
|
| * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2040 | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasDtrsv (char uplo, char trans, char diag, int n, | | void CUBLASAPI cublasDtrsv (char uplo, char trans, char diag, int n, | |
| const double *A, int lda, double *x, | | const double *A, int lda, double *x, | |
| int incx); | | int incx); | |
| | | | |
|
| | | /* | |
| | | * void | |
| | | * cublasDtrmv (char uplo, char trans, char diag, int n, const double *A, | |
| | | * int lda, double *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = | |
| | | = A, or op(A) = transpose(A). x is an n-element single precision vector, a | |
| | | nd | |
| | | * A is an n x n, unit or non-unit, upper or lower, triangular matrix compo | |
| | | sed | |
| | | * of single precision elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo = 'U' or 'u', then A is an upper triangular matri | |
| | | x. | |
| | | * If uplo = 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If transa = 'N' or 'n', op(A) = A. If trans = 'T | |
| | | ', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A) | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag = ' | |
| | | U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', | |
| | | A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * A single precision array of dimension (lda, n). If uplo = 'U' or 'u | |
| | | ', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular matrix and the strictly lower triangular par | |
| | | t | |
| | | * of A is not referenced. If uplo = 'L' or 'l', the leading n x n l | |
| | | ower | |
| | | * triangular part of the array A must contain the lower triangular | |
| | | * matrix and the strictly upper triangular part of A is not referen | |
| | | ced. | |
| | | * When diag = 'U' or 'u', the diagonal elements of A are not refere | |
| | | nced | |
| | | * either, but are are assumed to be unity. | |
| | | * lda is the leading dimension of A. It must be at least max (1, n). | |
| | | * x single precision array of length at least (1 + (n - 1) * abs(incx | |
| | | ) ). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dtrmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDtrmv (char uplo, char trans, char diag, int n, | |
| | | const double *A, int lda, double *x, int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDgbmv (char trans, int m, int n, int kl, int ku, double alpha, | |
| | | * const double *A, int lda, const double *x, int incx, double | |
| | | beta, | |
| | | * double *y, int incy); | |
| | | * | |
| | | * performs one of the matrix-vector operations | |
| | | * | |
| | | * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) | |
| | | * | |
| | | * alpha and beta are double precision scalars. x and y are double precisio | |
| | | n | |
| | | * vectors. A is an m by n band matrix consisting of double precision eleme | |
| | | nts | |
| | | * with kl sub-diagonals and ku super-diagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A) | |
| | | * m specifies the number of rows of the matrix A. m must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. n must be at lea | |
| | | st | |
| | | * zero. | |
| | | * kl specifies the number of sub-diagonals of matrix A. It must be at | |
| | | * least zero. | |
| | | * ku specifies the number of super-diagonals of matrix A. It must be a | |
| | | t | |
| | | * least zero. | |
| | | * alpha double precision scalar multiplier applied to op(A). | |
| | | * A double precision array of dimensions (lda, n). The leading | |
| | | * (kl + ku + 1) x n part of the array A must contain the band matri | |
| | | x A, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x | |
| | | * in row (ku + 1) of the array, the first super-diagonal starting a | |
| | | t | |
| | | * position 2 in row ku, the first sub-diagonal starting at position | |
| | | 1 | |
| | | * in row (ku + 2), and so on. Elements in the array A that do not | |
| | | * correspond to elements in the band matrix (such as the top left | |
| | | * ku x ku triangle) are not referenced. | |
| | | * lda leading dimension of A. lda must be at least (kl + ku + 1). | |
| | | * x double precision array of length at least (1+(n-1)*abs(incx)) whe | |
| | | n | |
| | | * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. | |
| | | * incx specifies the increment for the elements of x. incx must not be z | |
| | | ero. | |
| | | * beta double precision scalar multiplier applied to vector y. If beta i | |
| | | s | |
| | | * zero, y is not read. | |
| | | * y double precision array of length at least (1+(m-1)*abs(incy)) whe | |
| | | n | |
| | | * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. I | |
| | | f | |
| | | * beta is zero, y is not read. | |
| | | * incy On entry, incy specifies the increment for the elements of y. inc | |
| | | y | |
| | | * must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*op(A)*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dgbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDgbmv (char trans, int m, int n, int kl, int ku, | |
| | | double alpha, const double *A, int lda, | |
| | | const double *x, int incx, double beta, | |
| | | double *y, int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDtbmv (char uplo, char trans, char diag, int n, int k, const doubl | |
| | | e *A, | |
| | | * int lda, double *x, int incx) | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * or op(A) = transpose(A). x is an n-element double precision vector, and | |
| | | A is | |
| | | * an n x n, unit or non-unit, upper or lower triangular band matrix compos | |
| | | ed | |
| | | * of double precision elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular ba | |
| | | nd | |
| | | * matrix. If uplo == 'U' or 'u', A is an upper triangular band matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular band matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A) | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A double precision array of dimension (lda, n). If uplo == 'U' or ' | |
| | | u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first | |
| | | * super-diagonal starting at position 2 in row k, and so on. The to | |
| | | p | |
| | | * left k x k triangle of the array A is not referenced. If uplo == | |
| | | 'L' | |
| | | * or 'l', the leading (k + 1) x n part of the array A must constain | |
| | | the | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal startingat position 1 in row 2, and so on. The botto | |
| | | m | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * lda is the leading dimension of A. It must be at least (k + 1). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dtbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDtbmv (char uplo, char trans, char diag, int n, | |
| | | int k, const double *A, int lda, double *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDtpmv (char uplo, char trans, char diag, int n, const double *AP, | |
| | | * double *x, int incx); | |
| | | * | |
| | | * performs one of the matrix-vector operations x = op(A) * x, where op(A) | |
| | | = A, | |
| | | * or op(A) = transpose(A). x is an n element double precision vector, and | |
| | | A | |
| | | * is an n x n, unit or non-unit, upper or lower triangular matrix composed | |
| | | * of double precision elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo == 'U' or 'u', then A is an upper triangular matr | |
| | | ix. | |
| | | * If uplo == 'L' or 'l', then A is a lower triangular matrix. | |
| | | * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == | |
| | | 'T', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A) | |
| | | * diag specifies whether or not matrix A is unit triangular. If diag == | |
| | | 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n' | |
| | | , A | |
| | | * is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. In the current implementation n must not exceed 40 | |
| | | 70. | |
| | | * AP double precision array with at least ((n * (n + 1)) / 2) elements | |
| | | . If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * On entry, x contains the source vector. On exit, x is overwritten | |
| | | * with the result vector. | |
| | | * incx specifies the storage spacing for elements of x. incx must not be | |
| | | * zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated according to x = op(A) * x, | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dtpmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 | |
| | | * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough intern | |
| | | al scratch vector memory | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDtpmv (char uplo, char trans, char diag, int n, | |
| | | const double *AP, double *x, int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDtpsv (char uplo, char trans, char diag, int n, const double *AP, | |
| | | * double *X, int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A | |
| | | is | |
| | | * an n x n unit or non-unit, upper or lower triangular matrix. No test for | |
| | | * singularity or near-singularity is included in this routine. Such tests | |
| | | * must be performed before calling this routine. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular matr | |
| | | ix | |
| | | * as follows: If uplo == 'U' or 'u', A is an upper triangluar matri | |
| | | x. | |
| | | * If uplo == 'L' or 'l', A is a lower triangular matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * AP double precision array with at least ((n*(n+1))/2) elements. If u | |
| | | plo | |
| | | * == 'U' or 'u', the array AP contains the upper triangular matrix | |
| | | A, | |
| | | * packed sequentially, column by column; that is, if i <= j, then | |
| | | * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the | |
| | | * array AP contains the lower triangular matrix A, packed sequentia | |
| | | lly, | |
| | | * column by column; that is, if i >= j, then A[i,j] is stored in | |
| | | * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal eleme | |
| | | nts | |
| | | * of A are not referenced and are assumed to be unity. | |
| | | * x double precision array of length at least (1+(n-1)*abs(incx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dtpsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDtpsv (char uplo, char trans, char diag, int n, | |
| | | const double *AP, double *x, int incx); | |
| | | | |
| | | /* | |
| | | * void cublasDtbsv (char uplo, char trans, char diag, int n, int k, | |
| | | * const double *A, int lda, double *X, int incx) | |
| | | * | |
| | | * solves one of the systems of equations op(A)*x = b, where op(A) is eithe | |
| | | r | |
| | | * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A | |
| | | is | |
| | | * an n x n unit or non-unit, upper or lower triangular band matrix with k | |
| | | + 1 | |
| | | * diagonals. No test for singularity or near-singularity is included in th | |
| | | is | |
| | | * function. Such tests must be performed before calling this function. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix is an upper or lower triangular band | |
| | | * matrix as follows: If uplo == 'U' or 'u', A is an upper triangula | |
| | | r | |
| | | * band matrix. If uplo == 'L' or 'l', A is a lower triangular band | |
| | | * matrix. | |
| | | * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == ' | |
| | | T', | |
| | | * 't', 'C', or 'c', op(A) = transpose(A). | |
| | | * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A | |
| | | is | |
| | | * assumed to be unit triangular; thas is, diagonal elements are not | |
| | | * read and are assumed to be unity. If diag == 'N' or 'n', A is not | |
| | | * assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. n must | |
| | | be | |
| | | * at least zero. | |
| | | * k specifies the number of super- or sub-diagonals. If uplo == 'U' o | |
| | | r | |
| | | * 'u', k specifies the number of super-diagonals. If uplo == 'L' or | |
| | | * 'l', k specifies the number of sub-diagonals. k must at least be | |
| | | * zero. | |
| | | * A double precision array of dimension (lda, n). If uplo == 'U' or ' | |
| | | u', | |
| | | * the leading (k + 1) x n part of the array A must contain the uppe | |
| | | r | |
| | | * triangular band matrix, supplied column by column, with the leadi | |
| | | ng | |
| | | * diagonal of the matrix in row (k + 1) of the array, the first sup | |
| | | er- | |
| | | * diagonal starting at position 2 in row k, and so on. The top left | |
| | | * k x k triangle of the array A is not referenced. If uplo == 'L' o | |
| | | r | |
| | | * 'l', the leading (k + 1) x n part of the array A must constain th | |
| | | e | |
| | | * lower triangular band matrix, supplied column by column, with the | |
| | | * leading diagonal of the matrix in row 1 of the array, the first | |
| | | * sub-diagonal starting at position 1 in row 2, and so on. The bott | |
| | | om | |
| | | * right k x k triangle of the array is not referenced. | |
| | | * x double precision array of length at least (1+(n-1)*abs(incx)). | |
| | | * incx storage spacing between elements of x. It must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dtbsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDtbsv (char uplo, char trans, char diag, int n, | |
| | | int k, const double *A, int lda, double *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDsymv (char uplo, int n, double alpha, const double *A, int lda, | |
| | | * const double *x, int incx, double beta, double *y, int incy | |
| | | ) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y = alpha*A*x + beta*y | |
| | | * | |
| | | * Alpha and beta are double precision scalars, and x and y are double | |
| | | * precision vectors, each with n elements. A is a symmetric n x n matrix | |
| | | * consisting of double precision elements that is stored in either upper o | |
| | | r | |
| | | * lower storage mode. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the array | |
| | | A | |
| | | * is to be referenced. If uplo == 'U' or 'u', the symmetric matrix | |
| | | A | |
| | | * is stored in upper storage mode, i.e. only the upper triangular p | |
| | | art | |
| | | * of A is to be referenced while the lower triangular part of A is | |
| | | to | |
| | | * be inferred. If uplo == 'L' or 'l', the symmetric matrix A is sto | |
| | | red | |
| | | * in lower storage mode, i.e. only the lower triangular part of A i | |
| | | s | |
| | | * to be referenced while the upper triangular part of A is to be | |
| | | * inferred. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * symmetric matrix A. n must be at least zero. | |
| | | * alpha double precision scalar multiplier applied to A*x. | |
| | | * A double precision array of dimensions (lda, n). If uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n upper triangular part of the array A must conta | |
| | | in | |
| | | * the upper triangular part of the symmetric matrix and the strictl | |
| | | y | |
| | | * lower triangular part of A is not referenced. If uplo == 'L' or ' | |
| | | l', | |
| | | * the leading n x n lower triangular part of the array A must conta | |
| | | in | |
| | | * the lower triangular part of the symmetric matrix and the strictl | |
| | | y | |
| | | * upper triangular part of A is not referenced. | |
| | | * lda leading dimension of A. It must be at least max (1, n). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision scalar multiplier applied to vector y. | |
| | | * y double precision array of length at least (1 + (n - 1) * abs(incy | |
| | | )). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dsymv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDsymv (char uplo, int n, double alpha, | |
| | | const double *A, int lda, const double *x, | |
| | | int incx, double beta, double *y, int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDsbmv (char uplo, int n, int k, double alpha, const double *A, int | |
| | | lda, | |
| | | * const double *x, int incx, double beta, double *y, int incy | |
| | | ) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y := alpha*A*x + beta*y | |
| | | * | |
| | | * alpha and beta are double precision scalars. x and y are double precisio | |
| | | n | |
| | | * vectors with n elements. A is an n by n symmetric band matrix consisting | |
| | | * of double precision elements, with k super-diagonals and the same number | |
| | | * of subdiagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the symme | |
| | | tric | |
| | | * band matrix A is being supplied. If uplo == 'U' or 'u', the upper | |
| | | * triangular part is being supplied. If uplo == 'L' or 'l', the low | |
| | | er | |
| | | * triangular part is being supplied. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * symmetric matrix A. n must be at least zero. | |
| | | * k specifies the number of super-diagonals of matrix A. Since the ma | |
| | | trix | |
| | | * is symmetric, this is also the number of sub-diagonals. k must be | |
| | | at | |
| | | * least zero. | |
| | | * alpha double precision scalar multiplier applied to A*x. | |
| | | * A double precision array of dimensions (lda, n). When uplo == 'U' o | |
| | | r | |
| | | * 'u', the leading (k + 1) x n part of array A must contain the upp | |
| | | er | |
| | | * triangular band of the symmetric matrix, supplied column by colum | |
| | | n, | |
| | | * with the leading diagonal of the matrix in row (k+1) of the array | |
| | | , | |
| | | * the first super-diagonal starting at position 2 in row k, and so | |
| | | on. | |
| | | * The top left k x k triangle of the array A is not referenced. Whe | |
| | | n | |
| | | * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A m | |
| | | ust | |
| | | * contain the lower triangular band part of the symmetric matrix, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x in | |
| | | * row 1 of the array, the first sub-diagonal starting at position 1 | |
| | | in | |
| | | * row 2, and so on. The bottom right k x k triangle of the array A | |
| | | is | |
| | | * not referenced. | |
| | | * lda leading dimension of A. lda must be at least (k + 1). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision scalar multiplier applied to vector y. If beta i | |
| | | s | |
| | | * zero, y is not read. | |
| | | * y double precision array of length at least (1 + (n - 1) * abs(incy | |
| | | )). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dsbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDsbmv (char uplo, int n, int k, double alpha, | |
| | | const double *A, int lda, const double *x, | |
| | | int incx, double beta, double *y, int incy); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasDspmv (char uplo, int n, double alpha, const double *AP, const dou | |
| | | ble *x, | |
| | | * int incx, double beta, double *y, int incy) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y = alpha * A * x + beta * y | |
| | | * | |
| | | * Alpha and beta are double precision scalars, and x and y are double | |
| | | * precision vectors with n elements. A is a symmetric n x n matrix | |
| | | * consisting of double precision elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array AP. If uplo == 'U' or 'u', then the uppe | |
| | | r | |
| | | * triangular part of A is supplied in AP. If uplo == 'L' or 'l', th | |
| | | en | |
| | | * the lower triangular part of A is supplied in AP. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision scalar multiplier applied to A*x. | |
| | | * AP double precision array with at least ((n * (n + 1)) / 2) elements | |
| | | . If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the symmetric matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision scalar multiplier applied to vector y; | |
| | | * y double precision array of length at least (1 + (n - 1) * abs(incy | |
| | | )). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to y = alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/dspmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasDspmv (char uplo, int n, double alpha, | |
| | | const double *AP, const double *x, | |
| | | int incx, double beta, double *y, int incy); | |
| | | | |
| /* ---------------- CUBLAS double precision BLAS3 functions ---------------
- */ | | /* ---------------- CUBLAS double precision BLAS3 functions ---------------
- */ | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasDgemm (char transa, char transb, int m, int n, int k, double alpha
, | | * cublasDgemm (char transa, char transb, int m, int n, int k, double alpha
, | |
| * const double *A, int lda, const double *B, int ldb, | | * const double *A, int lda, const double *B, int ldb, | |
| * double beta, double *C, int ldc) | | * double beta, double *C, int ldc) | |
| * | | * | |
| * computes the product of matrix A and matrix B, multiplies the result | | * computes the product of matrix A and matrix B, multiplies the result | |
| * by scalar alpha, and adds the sum to the product of matrix C and | | * by scalar alpha, and adds the sum to the product of matrix C and | |
| | | | |
| skipping to change at line 3945 | | skipping to change at line 7051 | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasDsymm (char side, char uplo, int m, int n, | | void CUBLASAPI cublasDsymm (char side, char uplo, int m, int n, | |
| double alpha, const double *A, int lda, | | double alpha, const double *A, int lda, | |
| const double *B, int ldb, double beta, | | const double *B, int ldb, double beta, | |
| double *C, int ldc); | | double *C, int ldc); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
|
| | | * cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha, | |
| | | * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B | |
| | | , int ldb, | |
| | | * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * C = alpha * A * B + beta * C, or | |
| | | * C = alpha * B * A + beta * C, | |
| | | * | |
| | | * where alpha and beta are double precision complex scalars, A is a symmet | |
| | | ric matrix | |
| | | * consisting of double precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode, and B and C are m x n matrices consisting of double precis | |
| | | ion | |
| | | * complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether the symmetric matrix A appears on the left side | |
| | | * hand side or right hand side of matrix B, as follows. If side == | |
| | | 'L' | |
| | | * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', | |
| | | * then C = alpha * B * A + beta * C. | |
| | | * uplo specifies whether the symmetric matrix A is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the symmetric matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the symmetric matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * m specifies the number of rows of the matrix C, and the number of r | |
| | | ows | |
| | | * of matrix B. It also specifies the dimensions of symmetric matrix | |
| | | A | |
| | | * when side == 'L' or 'l'. m must be at least zero. | |
| | | * n specifies the number of columns of the matrix C, and the number o | |
| | | f | |
| | | * columns of matrix B. It also specifies the dimensions of symmetri | |
| | | c | |
| | | * matrix A when side == 'R' or 'r'. n must be at least zero. | |
| | | * alpha double precision scalar multiplier applied to A * B, or B * A | |
| | | * A double precision array of dimensions (lda, ka), where ka is m whe | |
| | | n | |
| | | * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the | |
| | | * leading m x m part of array A must contain the symmetric matrix, | |
| | | * such that when uplo == 'U' or 'u', the leading m x m part stores | |
| | | the | |
| | | * upper triangular part of the symmetric matrix, and the strictly l | |
| | | ower | |
| | | * triangular part of A is not referenced, and when uplo == 'U' or ' | |
| | | u', | |
| | | * the leading m x m part stores the lower triangular part of the | |
| | | * symmetric matrix and the strictly upper triangular part is not | |
| | | * referenced. If side == 'R' or 'r' the leading n x n part of array | |
| | | A | |
| | | * must contain the symmetric matrix, such that when uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n part stores the upper triangular part of the | |
| | | * symmetric matrix and the strictly lower triangular part of A is n | |
| | | ot | |
| | | * referenced, and when uplo == 'U' or 'u', the leading n x n part | |
| | | * stores the lower triangular part of the symmetric matrix and the | |
| | | * strictly upper triangular part is not referenced. | |
| | | * lda leading dimension of A. When side == 'L' or 'l', it must be at le | |
| | | ast | |
| | | * max(1, m) and at least max(1, n) otherwise. | |
| | | * B double precision array of dimensions (ldb, n). On entry, the lead | |
| | | ing | |
| | | * m x n part of the array contains the matrix B. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * beta double precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input | |
| | | * C double precision array of dimensions (ldc, n) | |
| | | * ldc leading dimension of C. Must be at least max(1, m) | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * B + beta * C, or C = alpha * | |
| | | * B * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zsymm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZsymm (char side, char uplo, int m, int n, | |
| | | cuDoubleComplex alpha, const cuDoubleComplex *A | |
| | | , int lda, | |
| | | const cuDoubleComplex *B, int ldb, cuDoubleComp | |
| | | lex beta, | |
| | | cuDoubleComplex *C, int ldc); | |
| | | | |
| | | /* | |
| | | * void | |
| * cublasDsyrk (char uplo, char trans, int n, int k, double alpha, | | * cublasDsyrk (char uplo, char trans, int n, int k, double alpha, | |
| * const double *A, int lda, double beta, double *C, int ldc) | | * const double *A, int lda, double beta, double *C, int ldc) | |
| * | | * | |
| * performs one of the symmetric rank k operations | | * performs one of the symmetric rank k operations | |
| * | | * | |
| * C = alpha * A * transpose(A) + beta * C, or | | * C = alpha * A * transpose(A) + beta * C, or | |
| * C = alpha * transpose(A) * A + beta * C. | | * C = alpha * transpose(A) * A + beta * C. | |
| * | | * | |
| * Alpha and beta are double precision scalars. C is an n x n symmetric mat
rix | | * Alpha and beta are double precision scalars. C is an n x n symmetric mat
rix | |
| * consisting of double precision elements and stored in either lower or | | * consisting of double precision elements and stored in either lower or | |
| | | | |
| skipping to change at line 3993 | | skipping to change at line 7182 | |
| * otherwise the leading k x n part of the array must contains the | | * otherwise the leading k x n part of the array must contains the | |
| * matrix A. | | * matrix A. | |
| * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | |
| * least max(1, n). Otherwise lda must be at least max(1, k). | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| * beta double precision scalar multiplier applied to C. If beta izs zero
, C | | * beta double precision scalar multiplier applied to C. If beta izs zero
, C | |
| * does not have to be a valid input | | * does not have to be a valid input | |
| * C double precision array of dimensions (ldc, n). If uplo = 'U' or '
u', | | * C double precision array of dimensions (ldc, n). If uplo = 'U' or '
u', | |
| * the leading n x n triangular part of the array C must contain the | | * the leading n x n triangular part of the array C must contain the | |
| * upper triangular part of the symmetric matrix C and the strictly | | * upper triangular part of the symmetric matrix C and the strictly | |
| * lower triangular part of C is not referenced. On exit, the upper | | * lower triangular part of C is not referenced. On exit, the upper | |
|
| * triangular part of C is overwritten by the upper trinagular part
of | | * triangular part of C is overwritten by the upper triangular part
of | |
| * the updated matrix. If uplo = 'L' or 'l', the leading n x n | | * the updated matrix. If uplo = 'L' or 'l', the leading n x n | |
| * triangular part of the array C must contain the lower triangular
part | | * triangular part of the array C must contain the lower triangular
part | |
| * of the symmetric matrix C and the strictly upper triangular part
of C | | * of the symmetric matrix C and the strictly upper triangular part
of C | |
| * is not referenced. On exit, the lower triangular part of C is | | * is not referenced. On exit, the lower triangular part of C is | |
|
| * overwritten by the lower trinagular part of the updated matrix. | | * overwritten by the lower triangular part of the updated matrix. | |
| * ldc leading dimension of C. It must be at least max(1, n). | | * ldc leading dimension of C. It must be at least max(1, n). | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | | * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | |
| * alpha * transpose(A) * A + beta * C | | * alpha * transpose(A) * A + beta * C | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/dsyrk.f | | * Reference: http://www.netlib.org/blas/dsyrk.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | | |
| skipping to change at line 4071 | | skipping to change at line 7260 | |
| * otherwise the leading k x n part of the array must contains the | | * otherwise the leading k x n part of the array must contains the | |
| * matrix A. | | * matrix A. | |
| * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be
at | |
| * least max(1, n). Otherwise lda must be at least max(1, k). | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| * beta double precision complex scalar multiplier applied to C. If beta
izs zero, C | | * beta double precision complex scalar multiplier applied to C. If beta
izs zero, C | |
| * does not have to be a valid input | | * does not have to be a valid input | |
| * C double precision complex array of dimensions (ldc, n). If uplo =
'U' or 'u', | | * C double precision complex array of dimensions (ldc, n). If uplo =
'U' or 'u', | |
| * the leading n x n triangular part of the array C must contain the | | * the leading n x n triangular part of the array C must contain the | |
| * upper triangular part of the symmetric matrix C and the strictly | | * upper triangular part of the symmetric matrix C and the strictly | |
| * lower triangular part of C is not referenced. On exit, the upper | | * lower triangular part of C is not referenced. On exit, the upper | |
|
| * triangular part of C is overwritten by the upper trinagular part
of | | * triangular part of C is overwritten by the upper triangular part
of | |
| * the updated matrix. If uplo = 'L' or 'l', the leading n x n | | * the updated matrix. If uplo = 'L' or 'l', the leading n x n | |
| * triangular part of the array C must contain the lower triangular
part | | * triangular part of the array C must contain the lower triangular
part | |
| * of the symmetric matrix C and the strictly upper triangular part
of C | | * of the symmetric matrix C and the strictly upper triangular part
of C | |
| * is not referenced. On exit, the lower triangular part of C is | | * is not referenced. On exit, the lower triangular part of C is | |
|
| * overwritten by the lower trinagular part of the updated matrix. | | * overwritten by the lower triangular part of the updated matrix. | |
| * ldc leading dimension of C. It must be at least max(1, n). | | * ldc leading dimension of C. It must be at least max(1, n). | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | | * C updated according to C = alpha * A * transpose(A) + beta * C, or
C = | |
| * alpha * transpose(A) * A + beta * C | | * alpha * transpose(A) * A + beta * C | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/zsyrk.f | | * Reference: http://www.netlib.org/blas/zsyrk.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | | |
| skipping to change at line 4100 | | skipping to change at line 7289 | |
| * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize
d | |
| * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasZsyrk (char uplo, char trans, int n, int k, | | void CUBLASAPI cublasZsyrk (char uplo, char trans, int n, int k, | |
| cuDoubleComplex alpha, | | cuDoubleComplex alpha, | |
| const cuDoubleComplex *A, int lda, | | const cuDoubleComplex *A, int lda, | |
| cuDoubleComplex beta, | | cuDoubleComplex beta, | |
| cuDoubleComplex *C, int ldc); | | cuDoubleComplex *C, int ldc); | |
|
| | | /* | |
| | | * void | |
| | | * cublasZsyr2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha | |
| | | , | |
| | | * const cuDoubleComplex *A, int lda, const cuDoubleComplex * | |
| | | B, int ldb, | |
| | | * cuDoubleComplex beta, cuDoubleComplex *C, int ldc) | |
| | | * | |
| | | * performs one of the symmetric rank 2k operations | |
| | | * | |
| | | * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, o | |
| | | r | |
| | | * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. | |
| | | * | |
| | | * Alpha and beta are double precision complex scalars. C is an n x n symme | |
| | | tric matrix | |
| | | * consisting of double precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode. A and B are matrices consisting of double precision comple | |
| | | x elements | |
| | | * with dimension of n x k in the first case, and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the symmetric matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the symmetric matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the symmetric matrix is to be references | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * | |
| | | C, | |
| | | * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + | |
| | | * alpha * transpose(B) * A + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of rows of matrix | |
| | | A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha double precision scalar multiplier. | |
| | | * A double precision array of dimensions (lda, ka), where ka is k whe | |
| | | n | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1,k). | |
| | | * B double precision array of dimensions (lda, kb), where kb is k whe | |
| | | n | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array B must contain the matrix B, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * B. | |
| | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be | |
| | | at | |
| | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| | | * beta double precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input. | |
| | | * C double precision array of dimensions (ldc, n). If uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the symmetric matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the symmetric matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * ldc leading dimension of C. Must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) | |
| | | + | |
| | | * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zsyr2k.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZsyr2k (char uplo, char trans, int n, int k, | |
| | | cuDoubleComplex alpha, const cuDoubleComplex * | |
| | | A, int lda, | |
| | | const cuDoubleComplex *B, int ldb, cuDoubleCom | |
| | | plex beta, | |
| | | cuDoubleComplex *C, int ldc); | |
| | | /* | |
| | | * void | |
| | | * cublasZher2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha | |
| | | , | |
| | | * const cuDoubleComplex *A, int lda, const cuDoubleComplex * | |
| | | B, int ldb, | |
| | | * double beta, cuDoubleComplex *C, int ldc) | |
| | | * | |
| | | * performs one of the hermitian rank 2k operations | |
| | | * | |
| | | * C = alpha * A * conjugate(transpose(B)) | |
| | | * + conjugate(alpha) * B * conjugate(transpose(A)) | |
| | | * + beta * C , | |
| | | * or | |
| | | * C = alpha * conjugate(transpose(A)) * B | |
| | | * + conjugate(alpha) * conjugate(transpose(B)) * A | |
| | | * + beta * C. | |
| | | * | |
| | | * Alpha is double precision complex scalar whereas Beta is a double precis | |
| | | ion real scalar. | |
| | | * C is an n x n hermitian matrix consisting of double precision complex el | |
| | | ements and | |
| | | * stored in either lower or upper storage mode. A and B are matrices consi | |
| | | sting of | |
| | | * double precision complex elements with dimension of n x k in the first c | |
| | | ase, | |
| | | * and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the hermitian matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be references | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | * C = alpha * A * conjugate(transpose(B)) | |
| | | * + conjugate(alpha) * B * conjugate(transpose(A)) | |
| | | * + beta * C . | |
| | | * If trans == 'T', 't', 'C', or 'c', | |
| | | * C = alpha * conjugate(transpose(A)) * B | |
| | | * + conjugate(alpha) * conjugate(transpose(B)) * A | |
| | | * + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of rows of matrix | |
| | | A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha double precision scalar multiplier. | |
| | | * A double precision array of dimensions (lda, ka), where ka is k whe | |
| | | n | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1,k). | |
| | | * B double precision array of dimensions (lda, kb), where kb is k whe | |
| | | n | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array B must contain the matrix B, | |
| | | * otherwise the leading k x n part of the array must contain the ma | |
| | | trix | |
| | | * B. | |
| | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be | |
| | | at | |
| | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| | | * beta double precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input. | |
| | | * C double precision array of dimensions (ldc, n). If uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the hermitian matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the hermitian matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * ldc leading dimension of C. Must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to alpha*A*conjugate(transpose(B)) + | |
| | | * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or | |
| | | * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(tran | |
| | | spose(B))*A | |
| | | * + beta*C. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zher2k.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZher2k (char uplo, char trans, int n, int k, | |
| | | cuDoubleComplex alpha, const cuDoubleComplex * | |
| | | A, int lda, | |
| | | const cuDoubleComplex *B, int ldb, double beta | |
| | | , | |
| | | cuDoubleComplex *C, int ldc); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZher (char uplo, int n, double alpha, const cuDoubleComplex *x, in | |
| | | t incx, | |
| | | * cuDoubleComplex *A, int lda) | |
| | | * | |
| | | * performs the hermitian rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(x)) + A, | |
| | | * | |
| | | * where alpha is a double precision real scalar, x is an n element double | |
| | | * precision complex vector and A is an n x n hermitian matrix consisting o | |
| | | f | |
| | | * double precision complex elements. Matrix A is stored in column major fo | |
| | | rmat, | |
| | | * and lda is the leading dimension of the two-dimensional array | |
| | | * containing A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or | |
| | | * the lower triangular part of array A. If uplo = 'U' or 'u', | |
| | | * then only the upper triangular part of A may be referenced. | |
| | | * If uplo = 'L' or 'l', then only the lower triangular part of | |
| | | * A may be referenced. | |
| | | * n specifies the number of rows and columns of the matrix A. It | |
| | | * must be at least 0. | |
| | | * alpha double precision real scalar multiplier applied to | |
| | | * x * conjugate(transpose(x)) | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must | |
| | | * not be zero. | |
| | | * A double precision complex array of dimensions (lda, n). If uplo = | |
| | | 'U' or | |
| | | * 'u', then A must contain the upper triangular part of a hermitian | |
| | | * matrix, and the strictly lower triangular part is not referenced. | |
| | | * If uplo = 'L' or 'l', then A contains the lower triangular part | |
| | | * of a hermitian matrix, and the strictly upper triangular part is | |
| | | * not referenced. The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * lda leading dimension of the two-dimensional array containing A. lda | |
| | | * must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(x)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zher.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZher (char uplo, int n, double alpha, | |
| | | const cuDoubleComplex *x, int incx, cuDoubleComp | |
| | | lex *A, | |
| | | int lda); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZher (char uplo, int n, double alpha, const cuDoubleComplex *x, in | |
| | | t incx, | |
| | | * cuDoubleComplex *A, int lda) | |
| | | * | |
| | | * performs the hermitian rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(x) + A, | |
| | | * | |
| | | * where alpha is a double precision real scalar, x is an n element double | |
| | | * precision complex vector and A is an n x n hermitian matrix consisting o | |
| | | f | |
| | | * double precision complex elements. Matrix A is stored in column major fo | |
| | | rmat, | |
| | | * and lda is the leading dimension of the two-dimensional array | |
| | | * containing A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or | |
| | | * the lower triangular part of array A. If uplo = 'U' or 'u', | |
| | | * then only the upper triangular part of A may be referenced. | |
| | | * If uplo = 'L' or 'l', then only the lower triangular part of | |
| | | * A may be referenced. | |
| | | * n specifies the number of rows and columns of the matrix A. It | |
| | | * must be at least 0. | |
| | | * alpha double precision real scalar multiplier applied to | |
| | | * x * conjugate(transpose(x)) | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must | |
| | | * not be zero. | |
| | | * A double precision complex array of dimensions (lda, n). If uplo = | |
| | | 'U' or | |
| | | * 'u', then A must contain the upper triangular part of a hermitian | |
| | | * matrix, and the strictly lower triangular part is not referenced. | |
| | | * If uplo = 'L' or 'l', then A contains the lower triangular part | |
| | | * of a hermitian matrix, and the strictly upper triangular part is | |
| | | * not referenced. The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * lda leading dimension of the two-dimensional array containing A. lda | |
| | | * must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(x) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zher.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZher (char uplo, int n, double alpha, const cuDoubleCo | |
| | | mplex *x, | |
| | | int incx, cuDoubleComplex *A, int lda); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x, in | |
| | | t incx, | |
| | | * cuDoubleComplex *AP) | |
| | | * | |
| | | * performs the hermitian rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(x)) + A, | |
| | | * | |
| | | * where alpha is a double precision real scalar and x is an n element doub | |
| | | le | |
| | | * precision complex vector. A is a hermitian n x n matrix consisting of do | |
| | | uble | |
| | | * precision complex elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array AP. If uplo == 'U' or 'u', then the uppe | |
| | | r | |
| | | * triangular part of A is supplied in AP. If uplo == 'L' or 'l', th | |
| | | en | |
| | | * the lower triangular part of A is supplied in AP. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision real scalar multiplier applied to x * conjugate( | |
| | | transpose(x)). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs(incx | |
| | | )). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * AP double precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * The imaginary parts of the diagonal elements need not be set, the | |
| | | y | |
| | | * are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(x)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhpr.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhpr (char uplo, int n, double alpha, | |
| | | const cuDoubleComplex *x, int incx, cuDoubleComp | |
| | | lex *AP); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComp | |
| | | lex *x, int incx, | |
| | | * const cuDoubleComplex *y, int incy, cuDoubleComplex *AP) | |
| | | * | |
| | | * performs the hermitian rank 2 operation | |
| | | * | |
| | | * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(tr | |
| | | anspose(x)) + A, | |
| | | * | |
| | | * where alpha is a double precision complex scalar, and x and y are n elem | |
| | | ent double | |
| | | * precision complex vectors. A is a hermitian n x n matrix consisting of d | |
| | | ouble | |
| | | * precision complex elements that is supplied in packed form. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision complex scalar multiplier applied to x * conjuga | |
| | | te(transpose(y)) + | |
| | | * y * conjugate(transpose(x)). | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs (incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y double precision complex array of length at least (1 + (n - 1) * | |
| | | abs (incy)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * AP double precision complex array with at least ((n * (n + 1)) / 2) | |
| | | elements. If | |
| | | * uplo == 'U' or 'u', the array AP contains the upper triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. I | |
| | | f | |
| | | * uplo == 'L' or 'L', the array AP contains the lower triangular pa | |
| | | rt | |
| | | * of the hermitian matrix A, packed sequentially, column by column; | |
| | | * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2 | |
| | | ]. | |
| | | * The imaginary parts of the diagonal elements need not be set, the | |
| | | y | |
| | | * are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*conjugate(transpose(y)) | |
| | | * + conjugate(alpha)*y*conjugate(transpose(x | |
| | | ))+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhpr2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *x, int incx, const cuDou | |
| | | bleComplex *y, | |
| | | int incy, cuDoubleComplex *AP); | |
| | | | |
| | | /* | |
| | | * void cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubl | |
| | | eComplex *x, int incx, | |
| | | * const cuDoubleComplex *y, int incy, cuDoubleComplex *A | |
| | | , int lda) | |
| | | * | |
| | | * performs the hermitian rank 2 operation | |
| | | * | |
| | | * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(tr | |
| | | anspose(x)) + A, | |
| | | * | |
| | | * where alpha is a double precision complex scalar, x and y are n element | |
| | | double | |
| | | * precision complex vector and A is an n by n hermitian matrix consisting | |
| | | of double | |
| | | * precision complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the l | |
| | | ower | |
| | | * triangular part of array A. If uplo == 'U' or 'u', then only the | |
| | | * upper triangular part of A may be referenced and the lower triang | |
| | | ular | |
| | | * part of A is inferred. If uplo == 'L' or 'l', then only the lower | |
| | | * triangular part of A may be referenced and the upper triangular p | |
| | | art | |
| | | * of A is inferred. | |
| | | * n specifies the number of rows and columns of the matrix A. It must | |
| | | be | |
| | | * at least zero. | |
| | | * alpha double precision complex scalar multiplier applied to x * conjuga | |
| | | te(transpose(y)) + | |
| | | * y * conjugate(transpose(x)). | |
| | | * x double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | x)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * y double precision array of length at least (1 + (n - 1) * abs (inc | |
| | | y)). | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * A double precision complex array of dimensions (lda, n). If uplo == | |
| | | 'U' or 'u', | |
| | | * then A must contains the upper triangular part of a hermitian mat | |
| | | rix, | |
| | | * and the strictly lower triangular parts is not referenced. If upl | |
| | | o == | |
| | | * 'L' or 'l', then A contains the lower triangular part of a hermit | |
| | | ian | |
| | | * matrix, and the strictly upper triangular part is not referenced. | |
| | | * The imaginary parts of the diagonal elements need not be set, | |
| | | * they are assumed to be zero, and on exit they are set to zero. | |
| | | * | |
| | | * lda leading dimension of A. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha*x*conjugate(transpose(y)) | |
| | | * + conjugate(alpha)*y*conjugate(transpose(x | |
| | | ))+A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zher2.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *x, int incx, const cuDo | |
| | | ubleComplex *y, | |
| | | int incy, cuDoubleComplex *A, int lda); | |
| | | | |
| /* | | /* | |
| * void | | * void | |
| * cublasDsyr2k (char uplo, char trans, int n, int k, double alpha, | | * cublasDsyr2k (char uplo, char trans, int n, int k, double alpha, | |
| * const double *A, int lda, const double *B, int ldb, | | * const double *A, int lda, const double *B, int ldb, | |
| * double beta, double *C, int ldc) | | * double beta, double *C, int ldc) | |
| * | | * | |
| * performs one of the symmetric rank 2k operations | | * performs one of the symmetric rank 2k operations | |
| * | | * | |
| * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, o
r | | * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, o
r | |
| | | | |
| skipping to change at line 4159 | | skipping to change at line 7821 | |
| * otherwise the leading k x n part of the array must contain the ma
trix | | * otherwise the leading k x n part of the array must contain the ma
trix | |
| * B. | | * B. | |
| * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be
at | | * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be
at | |
| * least max(1, n). Otherwise ldb must be at least max(1, k). | | * least max(1, n). Otherwise ldb must be at least max(1, k). | |
| * beta double precision scalar multiplier applied to C. If beta is zero,
C | | * beta double precision scalar multiplier applied to C. If beta is zero,
C | |
| * does not have to be a valid input. | | * does not have to be a valid input. | |
| * C double precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | | * C double precision array of dimensions (ldc, n). If uplo == 'U' or
'u', | |
| * the leading n x n triangular part of the array C must contain the | | * the leading n x n triangular part of the array C must contain the | |
| * upper triangular part of the symmetric matrix C and the strictly | | * upper triangular part of the symmetric matrix C and the strictly | |
| * lower triangular part of C is not referenced. On exit, the upper | | * lower triangular part of C is not referenced. On exit, the upper | |
|
| * triangular part of C is overwritten by the upper trinagular part
of | | * triangular part of C is overwritten by the upper triangular part
of | |
| * the updated matrix. If uplo == 'L' or 'l', the leading n x n | | * the updated matrix. If uplo == 'L' or 'l', the leading n x n | |
| * triangular part of the array C must contain the lower triangular
part | | * triangular part of the array C must contain the lower triangular
part | |
| * of the symmetric matrix C and the strictly upper triangular part
of C | | * of the symmetric matrix C and the strictly upper triangular part
of C | |
| * is not referenced. On exit, the lower triangular part of C is | | * is not referenced. On exit, the lower triangular part of C is | |
|
| * overwritten by the lower trinagular part of the updated matrix. | | * overwritten by the lower triangular part of the updated matrix. | |
| * ldc leading dimension of C. Must be at least max(1, n). | | * ldc leading dimension of C. Must be at least max(1, n). | |
| * | | * | |
| * Output | | * Output | |
| * ------ | | * ------ | |
| * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A)
+ | | * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A)
+ | |
| * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | | * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C | |
| * | | * | |
| * Reference: http://www.netlib.org/blas/dsyr2k.f | | * Reference: http://www.netlib.org/blas/dsyr2k.f | |
| * | | * | |
| * Error status for this function can be retrieved via cublasGetError(). | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | | |
| skipping to change at line 4256 | | skipping to change at line 7918 | |
| * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| */ | | */ | |
| void CUBLASAPI cublasZgemm (char transa, char transb, int m, int n, | | void CUBLASAPI cublasZgemm (char transa, char transb, int m, int n, | |
| int k, cuDoubleComplex alpha, | | int k, cuDoubleComplex alpha, | |
| const cuDoubleComplex *A, int lda, | | const cuDoubleComplex *A, int lda, | |
| const cuDoubleComplex *B, int ldb, | | const cuDoubleComplex *B, int ldb, | |
| cuDoubleComplex beta, cuDoubleComplex *C, | | cuDoubleComplex beta, cuDoubleComplex *C, | |
| int ldc); | | int ldc); | |
| | | | |
|
| | | /* | |
| | | * void | |
| | | * cublasZtrmm (char side, char uplo, char transa, char diag, int m, int n, | |
| | | * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, c | |
| | | onst cuDoubleComplex *B, | |
| | | * int ldb) | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * B = alpha * op(A) * B, or B = alpha * B * op(A) | |
| | | * | |
| | | * where alpha is a double-precision complex scalar, B is an m x n matrix c | |
| | | omposed | |
| | | * of double precision complex elements, and A is a unit or non-unit, upper | |
| | | or lower, | |
| | | * triangular matrix composed of double precision complex elements. op(A) i | |
| | | s one of | |
| | | * | |
| | | * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) | |
| | | * | |
| | | * Matrices A and B are stored in column major format, and lda and ldb are | |
| | | * the leading dimensions of the two-dimensonials arrays that contain A and | |
| | | * B, respectively. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether op(A) multiplies B from the left or right. | |
| | | * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = | |
| | | * 'R' or 'r', then B = alpha * B * op(A). | |
| | | * uplo specifies whether the matrix A is an upper or lower triangular | |
| | | * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. | |
| | | * If uplo = 'L' or 'l', A is a lower triangular matrix. | |
| | | * transa specifies the form of op(A) to be used in the matrix | |
| | | * multiplication. If transa = 'N' or 'n', then op(A) = A. If | |
| | | * transa = 'T' or 't', then op(A) = transpose(A). | |
| | | * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). | |
| | | * diag specifies whether or not A is unit triangular. If diag = 'U' | |
| | | * or 'u', A is assumed to be unit triangular. If diag = 'N' or | |
| | | * 'n', A is not assumed to be unit triangular. | |
| | | * m the number of rows of matrix B. m must be at least zero. | |
| | | * n the number of columns of matrix B. n must be at least zero. | |
| | | * alpha double precision complex scalar multiplier applied to op(A)*B, or | |
| | | * B*op(A), respectively. If alpha is zero no accesses are made | |
| | | * to matrix A, and no read accesses are made to matrix B. | |
| | | * A double precision complex array of dimensions (lda, k). k = m if s | |
| | | ide = | |
| | | * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' | |
| | | * the leading k x k upper triangular part of the array A must | |
| | | * contain the upper triangular matrix, and the strictly lower | |
| | | * triangular part of A is not referenced. If uplo = 'L' or 'l' | |
| | | * the leading k x k lower triangular part of the array A must | |
| | | * contain the lower triangular matrix, and the strictly upper | |
| | | * triangular part of A is not referenced. When diag = 'U' or 'u' | |
| | | * the diagonal elements of A are no referenced and are assumed | |
| | | * to be unity. | |
| | | * lda leading dimension of A. When side = 'L' or 'l', it must be at | |
| | | * least max(1,m) and at least max(1,n) otherwise | |
| | | * B double precision complex array of dimensions (ldb, n). On entry, | |
| | | the | |
| | | * leading m x n part of the array contains the matrix B. It is | |
| | | * overwritten with the transformed matrix on exit. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * B updated according to B = alpha * op(A) * B or B = alpha * B * op | |
| | | (A) | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztrmm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtrmm (char side, char uplo, char transa, | |
| | | char diag, int m, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *A, int lda, cuDoubleComp | |
| | | lex *B, | |
| | | int ldb); | |
| | | | |
| | | /* | |
| | | * cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex | |
| | | *x, int incx, | |
| | | * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int | |
| | | lda) | |
| | | * | |
| | | * performs the symmetric rank 1 operation | |
| | | * | |
| | | * A = alpha * x * transpose(y) + A, | |
| | | * | |
| | | * where alpha is a double precision complex scalar, x is an m element doub | |
| | | le | |
| | | * precision complex vector, y is an n element double precision complex vec | |
| | | tor, and A | |
| | | * is an m by n matrix consisting of double precision complex elements. Mat | |
| | | rix A | |
| | | * is stored in column major format, and lda is the leading dimension of | |
| | | * the two-dimensional array used to store A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * m specifies the number of rows of the matrix A. It must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. It must be at | |
| | | * least zero. | |
| | | * alpha double precision complex scalar multiplier applied to x * transpo | |
| | | se(y) | |
| | | * x double precision complex array of length at least (1 + (m - 1) * | |
| | | abs(incx)) | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * y double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)) | |
| | | * incy specifies the storage spacing between elements of y. incy must no | |
| | | t | |
| | | * be zero. | |
| | | * A double precision complex array of dimensions (lda, n). | |
| | | * lda leading dimension of two-dimensional array used to store matrix A | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * transpose(y) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zgeru.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *x, int incx, const cuDou | |
| | | bleComplex *y, | |
| | | int incy, cuDoubleComplex *A, int lda); | |
| | | | |
| | | /* | |
| | | * cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex | |
| | | *x, int incx, | |
| | | * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int | |
| | | lda) | |
| | | * | |
| | | * performs the symmetric rank 1 operation | |
| | | * | |
| | | * A = alpha * x * conjugate(transpose(y)) + A, | |
| | | * | |
| | | * where alpha is a double precision complex scalar, x is an m element doub | |
| | | le | |
| | | * precision complex vector, y is an n element double precision complex vec | |
| | | tor, and A | |
| | | * is an m by n matrix consisting of double precision complex elements. Mat | |
| | | rix A | |
| | | * is stored in column major format, and lda is the leading dimension of | |
| | | * the two-dimensional array used to store A. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * m specifies the number of rows of the matrix A. It must be at least | |
| | | * zero. | |
| | | * n specifies the number of columns of the matrix A. It must be at | |
| | | * least zero. | |
| | | * alpha double precision complex scalar multiplier applied to x * conjuga | |
| | | te(transpose(y)) | |
| | | * x double precision array of length at least (1 + (m - 1) * abs(incx | |
| | | )) | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * y double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)) | |
| | | * incy specifies the storage spacing between elements of y. incy must no | |
| | | t | |
| | | * be zero. | |
| | | * A double precision complex array of dimensions (lda, n). | |
| | | * lda leading dimension of two-dimensional array used to store matrix A | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * A updated according to A = alpha * x * conjugate(transpose(y)) + A | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zgerc.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *x, int incx, const cuDou | |
| | | bleComplex *y, | |
| | | int incy, cuDoubleComplex *A, int lda); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZherk (char uplo, char trans, int n, int k, double alpha, | |
| | | * const cuDoubleComplex *A, int lda, double beta, cuDoubleCom | |
| | | plex *C, int ldc) | |
| | | * | |
| | | * performs one of the hermitian rank k operations | |
| | | * | |
| | | * C = alpha * A * conjugate(transpose(A)) + beta * C, or | |
| | | * C = alpha * conjugate(transpose(A)) * A + beta * C. | |
| | | * | |
| | | * Alpha and beta are double precision scalars. C is an n x n hermitian mat | |
| | | rix | |
| | | * consisting of double precision complex elements and stored in either low | |
| | | er or | |
| | | * upper storage mode. A is a matrix consisting of double precision complex | |
| | | elements | |
| | | * with dimension of n x k in the first case, and k x n in the second case. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the hermitian matrix C is stored in upper or lo | |
| | | wer | |
| | | * storage mode as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * trans specifies the operation to be performed. If trans == 'N' or 'n', | |
| | | C = | |
| | | * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', | |
| | | 't', 'C', or 'c', | |
| | | * C = alpha * conjugate(transpose(A)) * A + beta * C. | |
| | | * n specifies the number of rows and the number columns of matrix C. | |
| | | If | |
| | | * trans == 'N' or 'n', n specifies the number of rows of matrix A. | |
| | | If | |
| | | * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix | |
| | | A. | |
| | | * n must be at least zero. | |
| | | * k If trans == 'N' or 'n', k specifies the number of columns of matr | |
| | | ix A. | |
| | | * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows | |
| | | of | |
| | | * matrix A. k must be at least zero. | |
| | | * alpha double precision scalar multiplier applied to A * conjugate(trans | |
| | | pose(A)) or | |
| | | * conjugate(transpose(A)) * A. | |
| | | * A double precision complex array of dimensions (lda, ka), where ka | |
| | | is k when | |
| | | * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n' | |
| | | , | |
| | | * the leading n x k part of array A must contain the matrix A, | |
| | | * otherwise the leading k x n part of the array must contains the | |
| | | * matrix A. | |
| | | * lda leading dimension of A. When trans == 'N' or 'n' then lda must be | |
| | | at | |
| | | * least max(1, n). Otherwise lda must be at least max(1, k). | |
| | | * beta double precision scalar multiplier applied to C. If beta is zero, | |
| | | C | |
| | | * does not have to be a valid input | |
| | | * C double precision complex array of dimensions (ldc, n). If uplo = | |
| | | 'U' or 'u', | |
| | | * the leading n x n triangular part of the array C must contain the | |
| | | * upper triangular part of the hermitian matrix C and the strictly | |
| | | * lower triangular part of C is not referenced. On exit, the upper | |
| | | * triangular part of C is overwritten by the upper triangular part | |
| | | of | |
| | | * the updated matrix. If uplo = 'L' or 'l', the leading n x n | |
| | | * triangular part of the array C must contain the lower triangular | |
| | | part | |
| | | * of the hermitian matrix C and the strictly upper triangular part | |
| | | of C | |
| | | * is not referenced. On exit, the lower triangular part of C is | |
| | | * overwritten by the lower triangular part of the updated matrix. | |
| | | * The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero, and on exit they | |
| | | * are set to zero. | |
| | | * ldc leading dimension of C. It must be at least max(1, n). | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * conjugate(transpose(A)) + be | |
| | | ta * C, or C = | |
| | | * alpha * conjugate(transpose(A)) * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zherk.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZherk (char uplo, char trans, int n, int k, | |
| | | double alpha, | |
| | | const cuDoubleComplex *A, int lda, | |
| | | double beta, | |
| | | cuDoubleComplex *C, int ldc); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhemm (char side, char uplo, int m, int n, cuDoubleComplex alpha, | |
| | | * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B | |
| | | , int ldb, | |
| | | * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); | |
| | | * | |
| | | * performs one of the matrix-matrix operations | |
| | | * | |
| | | * C = alpha * A * B + beta * C, or | |
| | | * C = alpha * B * A + beta * C, | |
| | | * | |
| | | * where alpha and beta are double precision complex scalars, A is a hermit | |
| | | ian matrix | |
| | | * consisting of double precision complex elements and stored in either low | |
| | | er or upper | |
| | | * storage mode, and B and C are m x n matrices consisting of double precis | |
| | | ion | |
| | | * complex elements. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * side specifies whether the hermitian matrix A appears on the left side | |
| | | * hand side or right hand side of matrix B, as follows. If side == | |
| | | 'L' | |
| | | * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', | |
| | | * then C = alpha * B * A + beta * C. | |
| | | * uplo specifies whether the hermitian matrix A is stored in upper or lo | |
| | | wer | |
| | | * storage mode, as follows. If uplo == 'U' or 'u', only the upper | |
| | | * triangular part of the hermitian matrix is to be referenced, and | |
| | | the | |
| | | * elements of the strictly lower triangular part are to be infered | |
| | | from | |
| | | * those in the upper triangular part. If uplo == 'L' or 'l', only t | |
| | | he | |
| | | * lower triangular part of the hermitian matrix is to be referenced | |
| | | , | |
| | | * and the elements of the strictly upper triangular part are to be | |
| | | * infered from those in the lower triangular part. | |
| | | * m specifies the number of rows of the matrix C, and the number of r | |
| | | ows | |
| | | * of matrix B. It also specifies the dimensions of hermitian matrix | |
| | | A | |
| | | * when side == 'L' or 'l'. m must be at least zero. | |
| | | * n specifies the number of columns of the matrix C, and the number o | |
| | | f | |
| | | * columns of matrix B. It also specifies the dimensions of hermitia | |
| | | n | |
| | | * matrix A when side == 'R' or 'r'. n must be at least zero. | |
| | | * alpha double precision scalar multiplier applied to A * B, or B * A | |
| | | * A double precision complex array of dimensions (lda, ka), where ka | |
| | | is m when | |
| | | * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the | |
| | | * leading m x m part of array A must contain the hermitian matrix, | |
| | | * such that when uplo == 'U' or 'u', the leading m x m part stores | |
| | | the | |
| | | * upper triangular part of the hermitian matrix, and the strictly l | |
| | | ower | |
| | | * triangular part of A is not referenced, and when uplo == 'U' or ' | |
| | | u', | |
| | | * the leading m x m part stores the lower triangular part of the | |
| | | * hermitian matrix and the strictly upper triangular part is not | |
| | | * referenced. If side == 'R' or 'r' the leading n x n part of array | |
| | | A | |
| | | * must contain the hermitian matrix, such that when uplo == 'U' or | |
| | | 'u', | |
| | | * the leading n x n part stores the upper triangular part of the | |
| | | * hermitian matrix and the strictly lower triangular part of A is n | |
| | | ot | |
| | | * referenced, and when uplo == 'U' or 'u', the leading n x n part | |
| | | * stores the lower triangular part of the hermitian matrix and the | |
| | | * strictly upper triangular part is not referenced. The imaginary p | |
| | | arts | |
| | | * of the diagonal elements need not be set, they are assumed to be | |
| | | zero. | |
| | | * | |
| | | * lda leading dimension of A. When side == 'L' or 'l', it must be at le | |
| | | ast | |
| | | * max(1, m) and at least max(1, n) otherwise. | |
| | | * B double precision complex array of dimensions (ldb, n). On entry, | |
| | | the leading | |
| | | * m x n part of the array contains the matrix B. | |
| | | * ldb leading dimension of B. It must be at least max (1, m). | |
| | | * beta double precision complex scalar multiplier applied to C. If beta | |
| | | is zero, C | |
| | | * does not have to be a valid input | |
| | | * C double precision complex array of dimensions (ldc, n) | |
| | | * ldc leading dimension of C. Must be at least max(1, m) | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * C updated according to C = alpha * A * B + beta * C, or C = alpha * | |
| | | * B * A + beta * C | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhemm.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhemm (char side, char uplo, int m, int n, | |
| | | cuDoubleComplex alpha, const cuDoubleComplex *A | |
| | | , int lda, | |
| | | const cuDoubleComplex *B, int ldb, cuDoubleComp | |
| | | lex beta, | |
| | | cuDoubleComplex *C, int ldc); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZtrsv (char uplo, char trans, char diag, int n, const cuDoubleComp | |
| | | lex *A, | |
| | | * int lda, cuDoubleComplex *x, int incx) | |
| | | * | |
| | | * solves a system of equations op(A) * x = b, where op(A) is either A, | |
| | | * transpose(A) or conjugate(transpose(A)). b and x are double precision | |
| | | * complex vectors consisting of n elements, and A is an n x n matrix | |
| | | * composed of a unit or non-unit, upper or lower triangular matrix. | |
| | | * Matrix A is stored in column major format, and lda is the leading | |
| | | * dimension of the two-dimensional array containing A. | |
| | | * | |
| | | * No test for singularity or near-singularity is included in this function | |
| | | . | |
| | | * Such tests must be performed before calling this function. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the matrix data is stored in the upper or the | |
| | | * lower triangular part of array A. If uplo = 'U' or 'u', then only | |
| | | * the upper triangular part of A may be referenced. If uplo = 'L' o | |
| | | r | |
| | | * 'l', then only the lower triangular part of A may be referenced. | |
| | | * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = ' | |
| | | t', | |
| | | * 'T', 'c', or 'C', op(A) = transpose(A) | |
| | | * diag specifies whether or not A is a unit triangular matrix like so: | |
| | | * if diag = 'U' or 'u', A is assumed to be unit triangular. If | |
| | | * diag = 'N' or 'n', then A is not assumed to be unit triangular. | |
| | | * n specifies the number of rows and columns of the matrix A. It | |
| | | * must be at least 0. | |
| | | * A is a double precision complex array of dimensions (lda, n). If up | |
| | | lo = 'U' | |
| | | * or 'u', then A must contains the upper triangular part of a symme | |
| | | tric | |
| | | * matrix, and the strictly lower triangular parts is not referenced | |
| | | . | |
| | | * If uplo = 'L' or 'l', then A contains the lower triangular part o | |
| | | f | |
| | | * a symmetric matrix, and the strictly upper triangular part is not | |
| | | * referenced. | |
| | | * lda is the leading dimension of the two-dimensional array containing | |
| | | A. | |
| | | * lda must be at least max(1, n). | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * On entry, x contains the n element right-hand side vector b. On e | |
| | | xit, | |
| | | * it is overwritten with the solution vector x. | |
| | | * incx specifies the storage spacing between elements of x. incx must no | |
| | | t | |
| | | * be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * x updated to contain the solution vector x that solves op(A) * x = | |
| | | b. | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/ztrsv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZtrsv (char uplo, char trans, char diag, int n, | |
| | | const cuDoubleComplex *A, int lda, cuDoubleComp | |
| | | lex *x, | |
| | | int incx); | |
| | | | |
| | | /* | |
| | | * void | |
| | | * cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, const cuDou | |
| | | bleComplex *A, int lda, | |
| | | * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, c | |
| | | uDoubleComplex *y, int incy) | |
| | | * | |
| | | * performs the matrix-vector operation | |
| | | * | |
| | | * y := alpha*A*x + beta*y | |
| | | * | |
| | | * alpha and beta are double precision complex scalars. x and y are double | |
| | | precision | |
| | | * complex vectors with n elements. A is an n by n hermitian band matrix co | |
| | | nsisting | |
| | | * of double precision complex elements, with k super-diagonals and the sam | |
| | | e number | |
| | | * of subdiagonals. | |
| | | * | |
| | | * Input | |
| | | * ----- | |
| | | * uplo specifies whether the upper or lower triangular part of the hermi | |
| | | tian | |
| | | * band matrix A is being supplied. If uplo == 'U' or 'u', the upper | |
| | | * triangular part is being supplied. If uplo == 'L' or 'l', the low | |
| | | er | |
| | | * triangular part is being supplied. | |
| | | * n specifies the number of rows and the number of columns of the | |
| | | * hermitian matrix A. n must be at least zero. | |
| | | * k specifies the number of super-diagonals of matrix A. Since the ma | |
| | | trix | |
| | | * is hermitian, this is also the number of sub-diagonals. k must be | |
| | | at | |
| | | * least zero. | |
| | | * alpha double precision complex scalar multiplier applied to A*x. | |
| | | * A double precision complex array of dimensions (lda, n). When uplo | |
| | | == 'U' or | |
| | | * 'u', the leading (k + 1) x n part of array A must contain the upp | |
| | | er | |
| | | * triangular band of the hermitian matrix, supplied column by colum | |
| | | n, | |
| | | * with the leading diagonal of the matrix in row (k+1) of the array | |
| | | , | |
| | | * the first super-diagonal starting at position 2 in row k, and so | |
| | | on. | |
| | | * The top left k x k triangle of the array A is not referenced. Whe | |
| | | n | |
| | | * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A m | |
| | | ust | |
| | | * contain the lower triangular band part of the hermitian matrix, | |
| | | * supplied column by column, with the leading diagonal of the matri | |
| | | x in | |
| | | * row 1 of the array, the first sub-diagonal starting at position 1 | |
| | | in | |
| | | * row 2, and so on. The bottom right k x k triangle of the array A | |
| | | is | |
| | | * not referenced. The imaginary parts of the diagonal elements need | |
| | | * not be set, they are assumed to be zero. | |
| | | * lda leading dimension of A. lda must be at least (k + 1). | |
| | | * x double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incx)). | |
| | | * incx storage spacing between elements of x. incx must not be zero. | |
| | | * beta double precision complex scalar multiplier applied to vector y. I | |
| | | f beta is | |
| | | * zero, y is not read. | |
| | | * y double precision complex array of length at least (1 + (n - 1) * | |
| | | abs(incy)). | |
| | | * If beta is zero, y is not read. | |
| | | * incy storage spacing between elements of y. incy must not be zero. | |
| | | * | |
| | | * Output | |
| | | * ------ | |
| | | * y updated according to alpha*A*x + beta*y | |
| | | * | |
| | | * Reference: http://www.netlib.org/blas/zhbmv.f | |
| | | * | |
| | | * Error status for this function can be retrieved via cublasGetError(). | |
| | | * | |
| | | * Error Status | |
| | | * ------------ | |
| | | * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialize | |
| | | d | |
| | | * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 | |
| | | * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support | |
| | | * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU | |
| | | */ | |
| | | void CUBLASAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, | |
| | | const cuDoubleComplex *A, int lda, const cuDoub | |
| | | leComplex *x, | |
| | | int incx, cuDoubleComplex beta, cuDoubleComplex | |
| | | *y, int incy); | |
| #if defined(__cplusplus) | | #if defined(__cplusplus) | |
| } | | } | |
| #endif /* __cplusplus */ | | #endif /* __cplusplus */ | |
| | | | |
| #endif /* !defined(CUBLAS_H_) */ | | #endif /* !defined(CUBLAS_H_) */ | |
| | | | |
End of changes. 74 change blocks. |
| 44 lines changed or deleted | | 5308 lines changed or added | |
|