| cuda.h | | cuda.h | |
| | | | |
| skipping to change at line 42 | | skipping to change at line 42 | |
| * include, in the user documentation and internal comments to the code, | | * include, in the user documentation and internal comments to the code, | |
| * the above Disclaimer and U.S. Government End Users Notice. | | * the above Disclaimer and U.S. Government End Users Notice. | |
| */ | | */ | |
| | | | |
| #ifndef __cuda_cuda_h__ | | #ifndef __cuda_cuda_h__ | |
| #define __cuda_cuda_h__ | | #define __cuda_cuda_h__ | |
| | | | |
| #include <stdlib.h> | | #include <stdlib.h> | |
| | | | |
| /** | | /** | |
|
| * \file | | * CUDA API versioning support | |
| * \name Data types used by CUDA driver | | */ | |
| * \author NVIDIA Corporation | | #if defined(CUDA_FORCE_API_VERSION) | |
| * \brief Data types used by CUDA driver | | #if (CUDA_FORCE_API_VERSION == 3010) | |
| | | #define __CUDA_API_VERSION 3010 | |
| | | #else | |
| | | #error "Unsupported value of CUDA_FORCE_API_VERSION" | |
| | | #endif | |
| | | #else | |
| | | #define __CUDA_API_VERSION 3020 | |
| | | #endif /* CUDA_FORCE_API_VERSION */ | |
| | | | |
| | | #if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 3020 | |
| | | #define cuDeviceTotalMem cuDeviceTotalMem_v2 | |
| | | #define cuCtxCreate cuCtxCreate_v2 | |
| | | #define cuModuleGetGlobal cuModuleGetGlobal_v2 | |
| | | #define cuMemGetInfo cuMemGetInfo_v2 | |
| | | #define cuMemAlloc cuMemAlloc_v2 | |
| | | #define cuMemAllocPitch cuMemAllocPitch_v2 | |
| | | #define cuMemFree cuMemFree_v2 | |
| | | #define cuMemGetAddressRange cuMemGetAddressRange_v2 | |
| | | #define cuMemAllocHost cuMemAllocHost_v2 | |
| | | #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v | |
| | | 2 | |
| | | #define cuMemcpyHtoD cuMemcpyHtoD_v2 | |
| | | #define cuMemcpyDtoH cuMemcpyDtoH_v2 | |
| | | #define cuMemcpyDtoD cuMemcpyDtoD_v2 | |
| | | #define cuMemcpyDtoA cuMemcpyDtoA_v2 | |
| | | #define cuMemcpyAtoD cuMemcpyAtoD_v2 | |
| | | #define cuMemcpyHtoA cuMemcpyHtoA_v2 | |
| | | #define cuMemcpyAtoH cuMemcpyAtoH_v2 | |
| | | #define cuMemcpyAtoA cuMemcpyAtoA_v2 | |
| | | #define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2 | |
| | | #define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2 | |
| | | #define cuMemcpy2D cuMemcpy2D_v2 | |
| | | #define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2 | |
| | | #define cuMemcpy3D cuMemcpy3D_v2 | |
| | | #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 | |
| | | #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 | |
| | | #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 | |
| | | #define cuMemcpy2DAsync cuMemcpy2DAsync_v2 | |
| | | #define cuMemcpy3DAsync cuMemcpy3DAsync_v2 | |
| | | #define cuMemsetD8 cuMemsetD8_v2 | |
| | | #define cuMemsetD16 cuMemsetD16_v2 | |
| | | #define cuMemsetD32 cuMemsetD32_v2 | |
| | | #define cuMemsetD2D8 cuMemsetD2D8_v2 | |
| | | #define cuMemsetD2D16 cuMemsetD2D16_v2 | |
| | | #define cuMemsetD2D32 cuMemsetD2D32_v2 | |
| | | #define cuArrayCreate cuArrayCreate_v2 | |
| | | #define cuArrayGetDescriptor cuArrayGetDescriptor_v2 | |
| | | #define cuArray3DCreate cuArray3DCreate_v2 | |
| | | #define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2 | |
| | | #define cuTexRefSetAddress cuTexRefSetAddress_v2 | |
| | | #define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2 | |
| | | #define cuTexRefGetAddress cuTexRefGetAddress_v2 | |
| | | #define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMapped | |
| | | Pointer_v2 | |
| | | #endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_DRIVER CUDA Driver API | |
| | | * | |
| | | * This section describes the low-level CUDA driver application programming | |
| | | * interface. | |
| | | * | |
| | | * @{ | |
| */ | | */ | |
| | | | |
| /** | | /** | |
| * \defgroup CUDA_TYPES Data types used by CUDA driver | | * \defgroup CUDA_TYPES Data types used by CUDA driver | |
|
| * \ingroup CUDA_DRIVER | | | |
| * @{ | | * @{ | |
| */ | | */ | |
| | | | |
| /** | | /** | |
| * CUDA API version number | | * CUDA API version number | |
| */ | | */ | |
|
| #define CUDA_VERSION 3010 /* 3.1 */ | | #define CUDA_VERSION 3020 /* 3.2 */ | |
| | | | |
| #ifdef __cplusplus | | #ifdef __cplusplus | |
| extern "C" { | | extern "C" { | |
| #endif | | #endif | |
|
| typedef unsigned int CUdeviceptr; ///< CUDA device pointer | | | |
| | | | |
|
| typedef int CUdevice; ///< CUDA device | | /** | |
| typedef struct CUctx_st *CUcontext; ///< CUDA context | | * CUDA device pointer | |
| typedef struct CUmod_st *CUmodule; ///< CUDA module | | */ | |
| typedef struct CUfunc_st *CUfunction; ///< CUDA function | | #if __CUDA_API_VERSION >= 3020 | |
| typedef struct CUarray_st *CUarray; ///< CUDA array | | | |
| typedef struct CUtexref_st *CUtexref; ///< CUDA texture reference | | | |
| typedef struct CUsurfref_st *CUsurfref; ///< CUDA surface reference | | | |
| typedef struct CUevent_st *CUevent; ///< CUDA event | | | |
| typedef struct CUstream_st *CUstream; ///< CUDA stream | | | |
| typedef struct CUgraphicsResource_st *CUgraphicsResource; ///< CUDA gra | | | |
| phics interop resource | | | |
| | | | |
|
| typedef struct CUuuid_st { ///< CUDA definition of UUID | | #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) | |
| char bytes[16]; | | typedef unsigned long long CUdeviceptr; | |
| } CUuuid; | | #else | |
| | | typedef unsigned int CUdeviceptr; | |
| | | #endif | |
| | | | |
|
| /************************************ | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| ** | | | |
| ** Enums | | typedef int CUdevice; /**< CUDA device | |
| ** | | */ | |
| ***********************************/ | | typedef struct CUctx_st *CUcontext; /**< CUDA context | |
| | | */ | |
| | | typedef struct CUmod_st *CUmodule; /**< CUDA module | |
| | | */ | |
| | | typedef struct CUfunc_st *CUfunction; /**< CUDA functio | |
| | | n */ | |
| | | typedef struct CUarray_st *CUarray; /**< CUDA array * | |
| | | / | |
| | | typedef struct CUtexref_st *CUtexref; /**< CUDA texture | |
| | | reference */ | |
| | | typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface | |
| | | reference */ | |
| | | typedef struct CUevent_st *CUevent; /**< CUDA event * | |
| | | / | |
| | | typedef struct CUstream_st *CUstream; /**< CUDA stream | |
| | | */ | |
| | | typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphic | |
| | | s interop resource */ | |
| | | | |
| | | typedef struct CUuuid_st { /**< CUDA definit | |
| | | ion of UUID */ | |
| | | char bytes[16]; | |
| | | } CUuuid; | |
| | | | |
| /** | | /** | |
| * Context creation flags | | * Context creation flags | |
| */ | | */ | |
| typedef enum CUctx_flags_enum { | | typedef enum CUctx_flags_enum { | |
|
| CU_CTX_SCHED_AUTO = 0, ///< Automatic scheduling | | CU_CTX_SCHED_AUTO = 0, /**< Automatic scheduling */ | |
| CU_CTX_SCHED_SPIN = 1, ///< Set spin as default scheduling | | CU_CTX_SCHED_SPIN = 1, /**< Set spin as default scheduling */ | |
| CU_CTX_SCHED_YIELD = 2, ///< Set yield as default scheduling | | CU_CTX_SCHED_YIELD = 2, /**< Set yield as default scheduling */ | |
| CU_CTX_SCHED_MASK = 0x3, | | CU_CTX_SCHED_MASK = 0x3, | |
|
| CU_CTX_BLOCKING_SYNC = 4, ///< Use blocking synchronization | | CU_CTX_BLOCKING_SYNC = 4, /**< Use blocking synchronization */ | |
| CU_CTX_MAP_HOST = 8, ///< Support mapped pinned allocations | | CU_CTX_MAP_HOST = 8, /**< Support mapped pinned allocations | |
| CU_CTX_LMEM_RESIZE_TO_MAX = 16, ///< Keep local memory allocation after | | */ | |
| launch | | CU_CTX_LMEM_RESIZE_TO_MAX = 16, /**< Keep local memory allocation after | |
| | | launch */ | |
| CU_CTX_FLAGS_MASK = 0x1f | | CU_CTX_FLAGS_MASK = 0x1f | |
| } CUctx_flags; | | } CUctx_flags; | |
| | | | |
| /** | | /** | |
| * Event creation flags | | * Event creation flags | |
| */ | | */ | |
| typedef enum CUevent_flags_enum { | | typedef enum CUevent_flags_enum { | |
|
| CU_EVENT_DEFAULT = 0, ///< Default event flag | | CU_EVENT_DEFAULT = 0, /**< Default event flag */ | |
| CU_EVENT_BLOCKING_SYNC = 1 ///< Event uses blocking synchronization | | CU_EVENT_BLOCKING_SYNC = 1, /**< Event uses blocking synchronization * | |
| | | / | |
| | | CU_EVENT_DISABLE_TIMING = 2 /**< Event will not record timing data */ | |
| } CUevent_flags; | | } CUevent_flags; | |
| | | | |
| /** | | /** | |
| * Array formats | | * Array formats | |
| */ | | */ | |
| typedef enum CUarray_format_enum { | | typedef enum CUarray_format_enum { | |
|
| CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit integers | | CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */ | |
| CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit integers | | CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */ | |
| CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit integers | | CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */ | |
| CU_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit integers | | CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */ | |
| CU_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit integers | | CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */ | |
| CU_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit integers | | CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */ | |
| CU_AD_FORMAT_HALF = 0x10, ///< 16-bit floating point | | CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */ | |
| CU_AD_FORMAT_FLOAT = 0x20 ///< 32-bit floating point | | CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */ | |
| } CUarray_format; | | } CUarray_format; | |
| | | | |
| /** | | /** | |
| * Texture reference addressing modes | | * Texture reference addressing modes | |
| */ | | */ | |
| typedef enum CUaddress_mode_enum { | | typedef enum CUaddress_mode_enum { | |
|
| CU_TR_ADDRESS_MODE_WRAP = 0, ///< Wrapping address mode | | CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */ | |
| CU_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp to edge address mode | | CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */ | |
| CU_TR_ADDRESS_MODE_MIRROR = 2 ///< Mirror address mode | | CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */ | |
| | | CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */ | |
| } CUaddress_mode; | | } CUaddress_mode; | |
| | | | |
| /** | | /** | |
| * Texture reference filtering modes | | * Texture reference filtering modes | |
| */ | | */ | |
| typedef enum CUfilter_mode_enum { | | typedef enum CUfilter_mode_enum { | |
|
| CU_TR_FILTER_MODE_POINT = 0, ///< Point filter mode | | CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */ | |
| CU_TR_FILTER_MODE_LINEAR = 1 ///< Linear filter mode | | CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */ | |
| } CUfilter_mode; | | } CUfilter_mode; | |
| | | | |
| /** | | /** | |
| * Device properties | | * Device properties | |
| */ | | */ | |
| typedef enum CUdevice_attribute_enum { | | typedef enum CUdevice_attribute_enum { | |
|
| CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, ///< Maximum number of | | CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximu | |
| threads per block | | m number of threads per block */ | |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, ///< Maximum block dime | | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximu | |
| nsion X | | m block dimension X */ | |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, ///< Maximum block dime | | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximu | |
| nsion Y | | m block dimension Y */ | |
| CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, ///< Maximum block dime | | CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximu | |
| nsion Z | | m block dimension Z */ | |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, ///< Maximum grid dimen | | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximu | |
| sion X | | m grid dimension X */ | |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, ///< Maximum grid dimen | | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximu | |
| sion Y | | m grid dimension Y */ | |
| CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, ///< Maximum grid dimen | | CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximu | |
| sion Z | | m grid dimension Z */ | |
| CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, ///< Maximum sh | | CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximu | |
| ared memory available per block in bytes | | m shared memory available per block in bytes */ | |
| CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, ///< Deprecated, us | | CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprec | |
| e CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK | | ated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ | |
| CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, ///< Memory available o | | CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory | |
| n device for __constant__ variables in a CUDA C kernel in bytes | | available on device for __constant__ variables in a CUDA C kernel in bytes | |
| CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, ///< Warp size in threa | | */ | |
| ds | | CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp s | |
| CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, ///< Maximum pitch in b | | ize in threads */ | |
| ytes allowed by memory copies | | CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximu | |
| CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, ///< Maximum number | | m pitch in bytes allowed by memory copies */ | |
| of 32-bit registers available per block | | CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximu | |
| CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, ///< Deprecated, use CU | | m number of 32-bit registers available per block */ | |
| _DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK | | CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprec | |
| CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, ///< Peak clock frequen | | ated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ | |
| cy in kilohertz | | CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak c | |
| CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, ///< Alignment requirem | | lock frequency in kilohertz */ | |
| ent for textures | | CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignm | |
| | | ent requirement for textures */ | |
| CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, ///< Device can possibl | | CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device | |
| y copy memory and execute a kernel concurrently | | can possibly copy memory and execute a kernel concurrently */ | |
| CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, ///< Number of multipro | | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number | |
| cessors on device | | of multiprocessors on device */ | |
| CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, ///< Specifies whether | | CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specif | |
| there is a run time limit on kernels | | ies whether there is a run time limit on kernels */ | |
| CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, ///< Device is integrat | | CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device | |
| ed with host memory | | is integrated with host memory */ | |
| CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, ///< Device can map hos | | CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device | |
| t memory into CUDA address space | | can map host memory into CUDA address space */ | |
| CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, ///< Compute mode (See | | CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Comput | |
| ::CUcomputemode for details) | | e mode (See ::CUcomputemode for details) */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, ///< Maximum 1D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximu | |
| re width | | m 1D texture width */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, ///< Maximum 2D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximu | |
| re width | | m 2D texture width */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,///< Maximum 2D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximu | |
| re height | | m 2D texture height */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, ///< Maximum 3D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximu | |
| re width | | m 3D texture width */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,///< Maximum 3D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximu | |
| re height | | m 3D texture height */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, ///< Maximum 3D textu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximu | |
| re depth | | m 3D texture depth */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, ///< Maximum te | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximu | |
| xture array width | | m texture array width */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,///< Maximum te | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximu | |
| xture array height | | m texture array height */ | |
| CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, ///< Maximu | | CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximu | |
| m slices in a texture array | | m slices in a texture array */ | |
| CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, ///< Alignment requirement | | CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignm | |
| for surfaces | | ent requirement for surfaces */ | |
| CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, ///< Device can possibly e | | CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device | |
| xecute multiple kernels concurrently | | can possibly execute multiple kernels concurrently */ | |
| CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, ///< Device has ECC support enabl | | CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device | |
| ed | | has ECC support enabled */ | |
| CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, ////< PCI bus ID of the device | | CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bu | |
| CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34 ////< PCI device ID of the devic | | s ID of the device */ | |
| e | | CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI de | |
| | | vice ID of the device */ | |
| | | CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device | |
| | | is using TCC driver model */ | |
| } CUdevice_attribute; | | } CUdevice_attribute; | |
| | | | |
| /** | | /** | |
| * Legacy device properties | | * Legacy device properties | |
| */ | | */ | |
| typedef struct CUdevprop_st { | | typedef struct CUdevprop_st { | |
|
| int maxThreadsPerBlock; ///< Maximum number of threads per block | | int maxThreadsPerBlock; /**< Maximum number of threads per block */ | |
| int maxThreadsDim[3]; ///< Maximum size of each dimension of a bl | | int maxThreadsDim[3]; /**< Maximum size of each dimension of a bl | |
| ock | | ock */ | |
| int maxGridSize[3]; ///< Maximum size of each dimension of a gr | | int maxGridSize[3]; /**< Maximum size of each dimension of a gr | |
| id | | id */ | |
| int sharedMemPerBlock; ///< Shared memory available per block in b | | int sharedMemPerBlock; /**< Shared memory available per block in b | |
| ytes | | ytes */ | |
| int totalConstantMemory; ///< Constant memory available on device in | | int totalConstantMemory; /**< Constant memory available on device in | |
| bytes | | bytes */ | |
| int SIMDWidth; ///< Warp size in threads | | int SIMDWidth; /**< Warp size in threads */ | |
| int memPitch; ///< Maximum pitch in bytes allowed by memo | | int memPitch; /**< Maximum pitch in bytes allowed by memo | |
| ry copies | | ry copies */ | |
| int regsPerBlock; ///< 32-bit registers available per block | | int regsPerBlock; /**< 32-bit registers available per block * | |
| int clockRate; ///< Clock frequency in kilohertz | | / | |
| int textureAlign; ///< Alignment requirement for textures | | int clockRate; /**< Clock frequency in kilohertz */ | |
| | | int textureAlign; /**< Alignment requirement for textures */ | |
| } CUdevprop; | | } CUdevprop; | |
| | | | |
| /** | | /** | |
| * Function properties | | * Function properties | |
| */ | | */ | |
| typedef enum CUfunction_attribute_enum { | | typedef enum CUfunction_attribute_enum { | |
| /** | | /** | |
|
| * The number of threads beyond which a launch of the function would fa | | * The maximum number of threads per block, beyond which a launch of th | |
| il. | | e | |
| * This number depends on both the function and the device on which the | | * function would fail. This number depends on both the function and th | |
| * function is currently loaded. | | e | |
| | | * device on which the function is currently loaded. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, | | CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, | |
| | | | |
| /** | | /** | |
| * The size in bytes of statically-allocated shared memory required by | | * The size in bytes of statically-allocated shared memory required by | |
| * this function. This does not include dynamically-allocated shared | | * this function. This does not include dynamically-allocated shared | |
| * memory requested by the user at runtime. | | * memory requested by the user at runtime. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, | | CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, | |
| | | | |
| /** | | /** | |
| * The size in bytes of user-allocated constant memory required by this | | * The size in bytes of user-allocated constant memory required by this | |
| * function. | | * function. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, | | CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, | |
| | | | |
| /** | | /** | |
|
| * The size in bytes of thread local memory used by this function. | | * The size in bytes of local memory used by each thread of this functi
on. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, | | CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, | |
| | | | |
| /** | | /** | |
| * The number of registers used by each thread of this function. | | * The number of registers used by each thread of this function. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_NUM_REGS = 4, | | CU_FUNC_ATTRIBUTE_NUM_REGS = 4, | |
| | | | |
| /** | | /** | |
|
| * The PTX virtual architecture version for which the function was comp | | * The PTX virtual architecture version for which the function was | |
| iled. | | * compiled. This value is the major PTX version * 10 + the minor PTX | |
| | | * version, so a PTX version 1.3 function would return the value 13. | |
| | | * Note that this may return the undefined value of 0 for cubins | |
| | | * compiled prior to CUDA 3.0. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, | | CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, | |
| | | | |
| /** | | /** | |
|
| * The binary version for which the function was compiled. | | * The binary architecture version for which the function was compiled. | |
| | | * This value is the major binary version * 10 + the minor binary versi | |
| | | on, | |
| | | * so a binary version 1.3 function would return the value 13. Note tha | |
| | | t | |
| | | * this will return a value of 10 for legacy cubins that do not have a | |
| | | * properly-encoded binary architecture version. | |
| */ | | */ | |
| CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, | | CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, | |
| | | | |
| CU_FUNC_ATTRIBUTE_MAX | | CU_FUNC_ATTRIBUTE_MAX | |
| } CUfunction_attribute; | | } CUfunction_attribute; | |
| | | | |
| /** | | /** | |
| * Function cache configurations | | * Function cache configurations | |
| */ | | */ | |
| typedef enum CUfunc_cache_enum { | | typedef enum CUfunc_cache_enum { | |
|
| CU_FUNC_CACHE_PREFER_NONE = 0x00, | | CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memo | |
| CU_FUNC_CACHE_PREFER_SHARED = 0x01, | | ry or L1 (default) */ | |
| CU_FUNC_CACHE_PREFER_L1 = 0x02 | | CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory a | |
| | | nd smaller L1 cache */ | |
| | | CU_FUNC_CACHE_PREFER_L1 = 0x02 /**< prefer larger L1 cache and sm | |
| | | aller shared memory */ | |
| } CUfunc_cache; | | } CUfunc_cache; | |
| | | | |
| /** | | /** | |
| * Memory types | | * Memory types | |
| */ | | */ | |
| typedef enum CUmemorytype_enum { | | typedef enum CUmemorytype_enum { | |
|
| CU_MEMORYTYPE_HOST = 0x01, ///< Host memory | | CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ | |
| CU_MEMORYTYPE_DEVICE = 0x02, ///< Device memory | | CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ | |
| CU_MEMORYTYPE_ARRAY = 0x03 ///< Array memory | | CU_MEMORYTYPE_ARRAY = 0x03 /**< Array memory */ | |
| } CUmemorytype; | | } CUmemorytype; | |
| | | | |
| /** | | /** | |
| * Compute Modes | | * Compute Modes | |
| */ | | */ | |
| typedef enum CUcomputemode_enum { | | typedef enum CUcomputemode_enum { | |
|
| CU_COMPUTEMODE_DEFAULT = 0, ///< Default compute mode (Multiple | | CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple con | |
| contexts allowed per device) | | texts allowed per device) */ | |
| CU_COMPUTEMODE_EXCLUSIVE = 1, ///< Compute-exclusive mode (Only on | | CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive mode (Only one c | |
| e context can be present on this device at a time) | | ontext can be present on this device at a time) */ | |
| CU_COMPUTEMODE_PROHIBITED = 2 ///< Compute-prohibited mode (No con | | CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contex | |
| texts can be created on this device at this time) | | ts can be created on this device at this time) */ | |
| } CUcomputemode; | | } CUcomputemode; | |
| | | | |
| /** | | /** | |
| * Online compiler options | | * Online compiler options | |
| */ | | */ | |
| typedef enum CUjit_option_enum | | typedef enum CUjit_option_enum | |
| { | | { | |
| /** | | /** | |
| * Max number of registers that a thread may use.\n | | * Max number of registers that a thread may use.\n | |
| * Option type: unsigned int | | * Option type: unsigned int | |
| */ | | */ | |
|
| CU_JIT_MAX_REGISTERS = 0, | | CU_JIT_MAX_REGISTERS = 0, | |
| | | | |
| /** | | /** | |
| * IN: Specifies minimum number of threads per block to target compilat
ion | | * IN: Specifies minimum number of threads per block to target compilat
ion | |
| * for\n | | * for\n | |
| * OUT: Returns the number of threads the compiler actually targeted. | | * OUT: Returns the number of threads the compiler actually targeted. | |
| * This restricts the resource utilization fo the compiler (e.g. max | | * This restricts the resource utilization fo the compiler (e.g. max | |
| * registers) such that a block with the given number of threads should
be | | * registers) such that a block with the given number of threads should
be | |
| * able to launch based on register limitations. Note, this option does
not | | * able to launch based on register limitations. Note, this option does
not | |
| * currently take into account any other resource limitations, such as | | * currently take into account any other resource limitations, such as | |
| * shared memory utilization.\n | | * shared memory utilization.\n | |
| | | | |
| skipping to change at line 368 | | skipping to change at line 443 | |
| */ | | */ | |
| CU_JIT_FALLBACK_STRATEGY | | CU_JIT_FALLBACK_STRATEGY | |
| | | | |
| } CUjit_option; | | } CUjit_option; | |
| | | | |
| /** | | /** | |
| * Online compilation targets | | * Online compilation targets | |
| */ | | */ | |
| typedef enum CUjit_target_enum | | typedef enum CUjit_target_enum | |
| { | | { | |
|
| CU_TARGET_COMPUTE_10 = 0, ///< Compute device class 1.0 | | CU_TARGET_COMPUTE_10 = 0, /**< Compute device class 1.0 */ | |
| CU_TARGET_COMPUTE_11, ///< Compute device class 1.1 | | CU_TARGET_COMPUTE_11, /**< Compute device class 1.1 */ | |
| CU_TARGET_COMPUTE_12, ///< Compute device class 1.2 | | CU_TARGET_COMPUTE_12, /**< Compute device class 1.2 */ | |
| CU_TARGET_COMPUTE_13, ///< Compute device class 1.3 | | CU_TARGET_COMPUTE_13, /**< Compute device class 1.3 */ | |
| CU_TARGET_COMPUTE_20 ///< Compute device class 2.0 | | CU_TARGET_COMPUTE_20, /**< Compute device class 2.0 */ | |
| | | CU_TARGET_COMPUTE_21 /**< Compute device class 2.1 */ | |
| } CUjit_target; | | } CUjit_target; | |
| | | | |
| /** | | /** | |
| * Cubin matching fallback strategies | | * Cubin matching fallback strategies | |
| */ | | */ | |
| typedef enum CUjit_fallback_enum | | typedef enum CUjit_fallback_enum | |
| { | | { | |
|
| /** Prefer to compile ptx */ | | CU_PREFER_PTX = 0, /**< Prefer to compile ptx */ | |
| CU_PREFER_PTX = 0, | | | |
| | | | |
|
| /** Prefer to fall back to compatible binary code */ | | CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code | |
| CU_PREFER_BINARY | | */ | |
| | | | |
| } CUjit_fallback; | | } CUjit_fallback; | |
| | | | |
| /** | | /** | |
| * Flags to register a graphics resource | | * Flags to register a graphics resource | |
| */ | | */ | |
| typedef enum CUgraphicsRegisterFlags_enum { | | typedef enum CUgraphicsRegisterFlags_enum { | |
| CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00 | | CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00 | |
| } CUgraphicsRegisterFlags; | | } CUgraphicsRegisterFlags; | |
| | | | |
| | | | |
| skipping to change at line 408 | | skipping to change at line 482 | |
| typedef enum CUgraphicsMapResourceFlags_enum { | | typedef enum CUgraphicsMapResourceFlags_enum { | |
| CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, | | CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, | |
| CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, | | CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, | |
| CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 | | CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 | |
| } CUgraphicsMapResourceFlags; | | } CUgraphicsMapResourceFlags; | |
| | | | |
| /** | | /** | |
| * Array indices for cube faces | | * Array indices for cube faces | |
| */ | | */ | |
| typedef enum CUarray_cubemap_face_enum { | | typedef enum CUarray_cubemap_face_enum { | |
|
| CU_CUBEMAP_FACE_POSITIVE_X = 0x00, ///< Positive X face of cubemap | | CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */ | |
| CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, ///< Negative X face of cubemap | | CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */ | |
| CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, ///< Positive Y face of cubemap | | CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */ | |
| CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, ///< Negative Y face of cubemap | | CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */ | |
| CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, ///< Positive Z face of cubemap | | CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */ | |
| CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 ///< Negative Z face of cubemap | | CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */ | |
| } CUarray_cubemap_face; | | } CUarray_cubemap_face; | |
| | | | |
| /** | | /** | |
| * Limits | | * Limits | |
| */ | | */ | |
| typedef enum CUlimit_enum { | | typedef enum CUlimit_enum { | |
|
| CU_LIMIT_STACK_SIZE = 0x00, ///< GPU thread stack size | | CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */ | |
| CU_LIMIT_PRINTF_FIFO_SIZE = 0x01 ///< GPU printf FIFO size | | CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */ | |
| | | CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /**< GPU malloc heap size */ | |
| } CUlimit; | | } CUlimit; | |
| | | | |
|
| /************************************ | | | |
| ** | | | |
| ** Error codes | | | |
| ** | | | |
| ***********************************/ | | | |
| | | | |
| /** | | /** | |
| * Error codes | | * Error codes | |
| */ | | */ | |
| typedef enum cudaError_enum { | | typedef enum cudaError_enum { | |
|
| | | /** | |
| | | * The API call returned with no errors. In the case of query calls, th | |
| | | is | |
| | | * can also mean that the operation being queried is complete (see | |
| | | * ::cuEventQuery() and ::cuStreamQuery()). | |
| | | */ | |
| | | CUDA_SUCCESS = 0, | |
| | | | |
|
| CUDA_SUCCESS = 0, ///< No errors | | /** | |
| CUDA_ERROR_INVALID_VALUE = 1, ///< Invalid value | | * This indicates that one or more of the parameters passed to the API | |
| CUDA_ERROR_OUT_OF_MEMORY = 2, ///< Out of memory | | call | |
| CUDA_ERROR_NOT_INITIALIZED = 3, ///< Driver not initia | | * is not within an acceptable range of values. | |
| lized | | */ | |
| CUDA_ERROR_DEINITIALIZED = 4, ///< Driver deinitiali | | CUDA_ERROR_INVALID_VALUE = 1, | |
| zed | | | |
| | | | |
|
| CUDA_ERROR_NO_DEVICE = 100, ///< No CUDA-capable d | | /** | |
| evice available | | * The API call failed because it was unable to allocate enough memory | |
| CUDA_ERROR_INVALID_DEVICE = 101, ///< Invalid device | | to | |
| | | * perform the requested operation. | |
| | | */ | |
| | | CUDA_ERROR_OUT_OF_MEMORY = 2, | |
| | | | |
|
| CUDA_ERROR_INVALID_IMAGE = 200, ///< Invalid kernel im | | /** | |
| age | | * This indicates that the CUDA driver has not been initialized with | |
| CUDA_ERROR_INVALID_CONTEXT = 201, ///< Invalid context | | * ::cuInit() or that initialization has failed. | |
| CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, ///< Context already c | | */ | |
| urrent | | CUDA_ERROR_NOT_INITIALIZED = 3, | |
| CUDA_ERROR_MAP_FAILED = 205, ///< Map failed | | | |
| CUDA_ERROR_UNMAP_FAILED = 206, ///< Unmap failed | | | |
| CUDA_ERROR_ARRAY_IS_MAPPED = 207, ///< Array is mapped | | | |
| CUDA_ERROR_ALREADY_MAPPED = 208, ///< Already mapped | | | |
| CUDA_ERROR_NO_BINARY_FOR_GPU = 209, ///< No binary for GPU | | | |
| CUDA_ERROR_ALREADY_ACQUIRED = 210, ///< Already acquired | | | |
| CUDA_ERROR_NOT_MAPPED = 211, ///< Not mapped | | | |
| CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, ///< Mapped resource n | | | |
| ot available for access as an array | | | |
| CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, ///< Mapped resource n | | | |
| ot available for access as a pointer | | | |
| CUDA_ERROR_ECC_UNCORRECTABLE = 214, ///< Uncorrectable ECC | | | |
| error detected | | | |
| CUDA_ERROR_UNSUPPORTED_LIMIT = 215, ///< CUlimit not suppo | | | |
| rted by device | | | |
| | | | |
|
| CUDA_ERROR_INVALID_SOURCE = 300, ///< Invalid source | | /** | |
| CUDA_ERROR_FILE_NOT_FOUND = 301, ///< File not found | | * This indicates that the CUDA driver is in the process of shutting do | |
| CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, ///< Link to a shared | | wn. | |
| object failed to resolve | | */ | |
| CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, ///< Shared object ini | | CUDA_ERROR_DEINITIALIZED = 4, | |
| tialization failed | | | |
| | | | |
|
| CUDA_ERROR_INVALID_HANDLE = 400, ///< Invalid handle | | /** | |
| | | * This indicates that no CUDA-capable devices were detected by the ins | |
| | | talled | |
| | | * CUDA driver. | |
| | | */ | |
| | | CUDA_ERROR_NO_DEVICE = 100, | |
| | | | |
|
| CUDA_ERROR_NOT_FOUND = 500, ///< Not found | | /** | |
| | | * This indicates that the device ordinal supplied by the user does not | |
| | | * correspond to a valid CUDA device. | |
| | | */ | |
| | | CUDA_ERROR_INVALID_DEVICE = 101, | |
| | | | |
|
| CUDA_ERROR_NOT_READY = 600, ///< CUDA not ready | | /** | |
| | | * This indicates that the device kernel image is invalid. This can als | |
| | | o | |
| | | * indicate an invalid CUDA module. | |
| | | */ | |
| | | CUDA_ERROR_INVALID_IMAGE = 200, | |
| | | | |
|
| CUDA_ERROR_LAUNCH_FAILED = 700, ///< Launch failed | | /** | |
| CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, ///< Launch exceeded r | | * This most frequently indicates that there is no context bound to the | |
| esources | | * current thread. This can also be returned if the context passed to a | |
| CUDA_ERROR_LAUNCH_TIMEOUT = 702, ///< Launch exceeded t | | n | |
| imeout | | * API call is not a valid handle (such as a context that has had | |
| CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, ///< Launch with incom | | * ::cuCtxDestroy() invoked on it). This can also be returned if a user | |
| patible texturing | | * mixes different API versions (i.e. 3010 context with 3020 API calls) | |
| | | . | |
| | | * See ::cuCtxGetApiVersion() for more details. | |
| | | */ | |
| | | CUDA_ERROR_INVALID_CONTEXT = 201, | |
| | | | |
|
| CUDA_ERROR_POINTER_IS_64BIT = 800, ///< Attempted to retr | | /** | |
| ieve 64-bit pointer via 32-bit API function | | * This indicated that the context being supplied as a parameter to the | |
| CUDA_ERROR_SIZE_IS_64BIT = 801, ///< Attempted to retr | | * API call was already the active context. | |
| ieve 64-bit size via 32-bit API function | | * \deprecated | |
| | | * This error return is deprecated as of CUDA 3.2. It is no longer an | |
| | | * error to attempt to push the active context via ::cuCtxPushCurrent() | |
| | | . | |
| | | */ | |
| | | CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, | |
| | | | |
|
| CUDA_ERROR_UNKNOWN = 999 ///< Unknown error | | /** | |
| | | * This indicates that a map or register operation has failed. | |
| | | */ | |
| | | CUDA_ERROR_MAP_FAILED = 205, | |
| | | | |
| | | /** | |
| | | * This indicates that an unmap or unregister operation has failed. | |
| | | */ | |
| | | CUDA_ERROR_UNMAP_FAILED = 206, | |
| | | | |
| | | /** | |
| | | * This indicates that the specified array is currently mapped and thus | |
| | | * cannot be destroyed. | |
| | | */ | |
| | | CUDA_ERROR_ARRAY_IS_MAPPED = 207, | |
| | | | |
| | | /** | |
| | | * This indicates that the resource is already mapped. | |
| | | */ | |
| | | CUDA_ERROR_ALREADY_MAPPED = 208, | |
| | | | |
| | | /** | |
| | | * This indicates that there is no kernel image available that is suita | |
| | | ble | |
| | | * for the device. This can occur when a user specifies code generation | |
| | | * options for a particular CUDA source file that do not include the | |
| | | * corresponding device configuration. | |
| | | */ | |
| | | CUDA_ERROR_NO_BINARY_FOR_GPU = 209, | |
| | | | |
| | | /** | |
| | | * This indicates that a resource has already been acquired. | |
| | | */ | |
| | | CUDA_ERROR_ALREADY_ACQUIRED = 210, | |
| | | | |
| | | /** | |
| | | * This indicates that a resource is not mapped. | |
| | | */ | |
| | | CUDA_ERROR_NOT_MAPPED = 211, | |
| | | | |
| | | /** | |
| | | * This indicates that a mapped resource is not available for access as | |
| | | an | |
| | | * array. | |
| | | */ | |
| | | CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, | |
| | | | |
| | | /** | |
| | | * This indicates that a mapped resource is not available for access as | |
| | | a | |
| | | * pointer. | |
| | | */ | |
| | | CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, | |
| | | | |
| | | /** | |
| | | * This indicates that an uncorrectable ECC error was detected during | |
| | | * execution. | |
| | | */ | |
| | | CUDA_ERROR_ECC_UNCORRECTABLE = 214, | |
| | | | |
| | | /** | |
| | | * This indicates that the ::CUlimit passed to the API call is not | |
| | | * supported by the active device. | |
| | | */ | |
| | | CUDA_ERROR_UNSUPPORTED_LIMIT = 215, | |
| | | | |
| | | /** | |
| | | * This indicates that the device kernel source is invalid. | |
| | | */ | |
| | | CUDA_ERROR_INVALID_SOURCE = 300, | |
| | | | |
| | | /** | |
| | | * This indicates that the file specified was not found. | |
| | | */ | |
| | | CUDA_ERROR_FILE_NOT_FOUND = 301, | |
| | | | |
| | | /** | |
| | | * This indicates that a link to a shared object failed to resolve. | |
| | | */ | |
| | | CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, | |
| | | | |
| | | /** | |
| | | * This indicates that initialization of a shared object failed. | |
| | | */ | |
| | | CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, | |
| | | | |
| | | /** | |
| | | * This indicates that an OS call failed. | |
| | | */ | |
| | | CUDA_ERROR_OPERATING_SYSTEM = 304, | |
| | | | |
| | | /** | |
| | | * This indicates that a resource handle passed to the API call was not | |
| | | * valid. Resource handles are opaque types like ::CUstream and ::CUeve | |
| | | nt. | |
| | | */ | |
| | | CUDA_ERROR_INVALID_HANDLE = 400, | |
| | | | |
| | | /** | |
| | | * This indicates that a named symbol was not found. Examples of symbol | |
| | | s | |
| | | * are global/constant variable names, texture names, and surface names | |
| | | . | |
| | | */ | |
| | | CUDA_ERROR_NOT_FOUND = 500, | |
| | | | |
| | | /** | |
| | | * This indicates that asynchronous operations issued previously have n | |
| | | ot | |
| | | * completed yet. This result is not actually an error, but must be ind | |
| | | icated | |
| | | * differently than ::CUDA_SUCCESS (which indicates completion). Calls | |
| | | that | |
| | | * may return this value include ::cuEventQuery() and ::cuStreamQuery() | |
| | | . | |
| | | */ | |
| | | CUDA_ERROR_NOT_READY = 600, | |
| | | | |
| | | /** | |
| | | * An exception occurred on the device while executing a kernel. Common | |
| | | * causes include dereferencing an invalid device pointer and accessing | |
| | | * out of bounds shared memory. The context cannot be used, so it must | |
| | | * be destroyed (and a new one should be created). All existing device | |
| | | * memory allocations from this context are invalid and must be | |
| | | * reconstructed if the program is to continue using CUDA. | |
| | | */ | |
| | | CUDA_ERROR_LAUNCH_FAILED = 700, | |
| | | | |
| | | /** | |
| | | * This indicates that a launch did not occur because it did not have | |
| | | * appropriate resources. This error usually indicates that the user ha | |
| | | s | |
| | | * attempted to pass too many arguments to the device kernel, or the | |
| | | * kernel launch specifies too many threads for the kernel's register | |
| | | * count. Passing arguments of the wrong size (i.e. a 64-bit pointer | |
| | | * when a 32-bit int is expected) is equivalent to passing too many | |
| | | * arguments and can also result in this error. | |
| | | */ | |
| | | CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, | |
| | | | |
| | | /** | |
| | | * This indicates that the device kernel took too long to execute. This | |
| | | can | |
| | | * only occur if timeouts are enabled - see the device attribute | |
| | | * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The | |
| | | * context cannot be used (and must be destroyed similar to | |
| | | * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations | |
| | | from | |
| | | * this context are invalid and must be reconstructed if the program is | |
| | | to | |
| | | * continue using CUDA. | |
| | | */ | |
| | | CUDA_ERROR_LAUNCH_TIMEOUT = 702, | |
| | | | |
| | | /** | |
| | | * This error indicates a kernel launch that uses an incompatible textu | |
| | | ring | |
| | | * mode. | |
| | | */ | |
| | | CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, | |
| | | | |
| | | /** | |
| | | * This indicates that an unknown internal error has occurred. | |
| | | */ | |
| | | CUDA_ERROR_UNKNOWN = 999 | |
| } CUresult; | | } CUresult; | |
| | | | |
| /** | | /** | |
| * If set, host memory is portable between CUDA contexts. | | * If set, host memory is portable between CUDA contexts. | |
| * Flag for ::cuMemHostAlloc() | | * Flag for ::cuMemHostAlloc() | |
| */ | | */ | |
| #define CU_MEMHOSTALLOC_PORTABLE 0x01 | | #define CU_MEMHOSTALLOC_PORTABLE 0x01 | |
| | | | |
| /** | | /** | |
| * If set, host memory is mapped into CUDA address space and | | * If set, host memory is mapped into CUDA address space and | |
| | | | |
| skipping to change at line 502 | | skipping to change at line 743 | |
| #define CU_MEMHOSTALLOC_DEVICEMAP 0x02 | | #define CU_MEMHOSTALLOC_DEVICEMAP 0x02 | |
| | | | |
| /** | | /** | |
| * If set, host memory is allocated as write-combined - fast to write, | | * If set, host memory is allocated as write-combined - fast to write, | |
| * faster to DMA, slow to read except via SSE4 streaming load instruction | | * faster to DMA, slow to read except via SSE4 streaming load instruction | |
| * (MOVNTDQA). | | * (MOVNTDQA). | |
| * Flag for ::cuMemHostAlloc() | | * Flag for ::cuMemHostAlloc() | |
| */ | | */ | |
| #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 | | #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 | |
| | | | |
|
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | | |
| /** | | /** | |
| * 2D memory copy parameters | | * 2D memory copy parameters | |
| */ | | */ | |
| typedef struct CUDA_MEMCPY2D_st { | | typedef struct CUDA_MEMCPY2D_st { | |
|
| | | size_t srcXInBytes; /**< Source X in bytes */ | |
| | | size_t srcY; /**< Source Y */ | |
| | | | |
|
| unsigned int srcXInBytes, ///< Source X in bytes | | CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra | |
| srcY; ///< Source Y | | y) */ | |
| CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra | | const void *srcHost; /**< Source host pointer */ | |
| y) | | CUdeviceptr srcDevice; /**< Source device pointer */ | |
| const void *srcHost; ///< Source host pointer | | CUarray srcArray; /**< Source array reference */ | |
| CUdeviceptr srcDevice; ///< Source device pointer | | size_t srcPitch; /**< Source pitch (ignored when src is arra | |
| CUarray srcArray; ///< Source array reference | | y) */ | |
| unsigned int srcPitch; ///< Source pitch (ignored when src is arra | | | |
| y) | | | |
| | | | |
|
| unsigned int dstXInBytes, ///< Destination X in bytes | | size_t dstXInBytes; /**< Destination X in bytes */ | |
| dstY; ///< Destination Y | | size_t dstY; /**< Destination Y */ | |
| CUmemorytype dstMemoryType; ///< Destination memory type (host, device, | | | |
| array) | | | |
| void *dstHost; ///< Destination host pointer | | | |
| CUdeviceptr dstDevice; ///< Destination device pointer | | | |
| CUarray dstArray; ///< Destination array reference | | | |
| unsigned int dstPitch; ///< Destination pitch (ignored when dst is | | | |
| array) | | | |
| | | | |
|
| unsigned int WidthInBytes; ///< Width of 2D memory copy in bytes | | CUmemorytype dstMemoryType; /**< Destination memory type (host, device, | |
| unsigned int Height; ///< Height of 2D memory copy | | array) */ | |
| | | void *dstHost; /**< Destination host pointer */ | |
| | | CUdeviceptr dstDevice; /**< Destination device pointer */ | |
| | | CUarray dstArray; /**< Destination array reference */ | |
| | | size_t dstPitch; /**< Destination pitch (ignored when dst is | |
| | | array) */ | |
| | | | |
| | | size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ | |
| | | size_t Height; /**< Height of 2D memory copy */ | |
| } CUDA_MEMCPY2D; | | } CUDA_MEMCPY2D; | |
| | | | |
| /** | | /** | |
| * 3D memory copy parameters | | * 3D memory copy parameters | |
| */ | | */ | |
| typedef struct CUDA_MEMCPY3D_st { | | typedef struct CUDA_MEMCPY3D_st { | |
|
| | | size_t srcXInBytes; /**< Source X in bytes */ | |
| | | size_t srcY; /**< Source Y */ | |
| | | size_t srcZ; /**< Source Z */ | |
| | | size_t srcLOD; /**< Source LOD */ | |
| | | CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra | |
| | | y) */ | |
| | | const void *srcHost; /**< Source host pointer */ | |
| | | CUdeviceptr srcDevice; /**< Source device pointer */ | |
| | | CUarray srcArray; /**< Source array reference */ | |
| | | void *reserved0; /**< Must be NULL */ | |
| | | size_t srcPitch; /**< Source pitch (ignored when src is arra | |
| | | y) */ | |
| | | size_t srcHeight; /**< Source height (ignored when src is arr | |
| | | ay; may be 0 if Depth==1) */ | |
| | | | |
|
| unsigned int srcXInBytes, ///< Source X in bytes | | size_t dstXInBytes; /**< Destination X in bytes */ | |
| srcY, ///< Source Y | | size_t dstY; /**< Destination Y */ | |
| srcZ; ///< Source Z | | size_t dstZ; /**< Destination Z */ | |
| unsigned int srcLOD; ///< Source LOD | | size_t dstLOD; /**< Destination LOD */ | |
| CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra | | CUmemorytype dstMemoryType; /**< Destination memory type (host, device, | |
| y) | | array) */ | |
| const void *srcHost; ///< Source host pointer | | void *dstHost; /**< Destination host pointer */ | |
| CUdeviceptr srcDevice; ///< Source device pointer | | CUdeviceptr dstDevice; /**< Destination device pointer */ | |
| CUarray srcArray; ///< Source array reference | | CUarray dstArray; /**< Destination array reference */ | |
| void *reserved0; ///< Must be NULL | | void *reserved1; /**< Must be NULL */ | |
| unsigned int srcPitch; ///< Source pitch (ignored when src is arra | | size_t dstPitch; /**< Destination pitch (ignored when dst is | |
| y) | | array) */ | |
| unsigned int srcHeight; ///< Source height (ignored when src is arr | | size_t dstHeight; /**< Destination height (ignored when dst i | |
| ay; may be 0 if Depth==1) | | s array; may be 0 if Depth==1) */ | |
| | | | |
| unsigned int dstXInBytes, ///< Destination X in bytes | | | |
| dstY, ///< Destination Y | | | |
| dstZ; ///< Destination Z | | | |
| unsigned int dstLOD; ///< Destination LOD | | | |
| CUmemorytype dstMemoryType; ///< Destination memory type (host, device, | | | |
| array) | | | |
| void *dstHost; ///< Destination host pointer | | | |
| CUdeviceptr dstDevice; ///< Destination device pointer | | | |
| CUarray dstArray; ///< Destination array reference | | | |
| void *reserved1; ///< Must be NULL | | | |
| unsigned int dstPitch; ///< Destination pitch (ignored when dst is | | | |
| array) | | | |
| unsigned int dstHeight; ///< Destination height (ignored when dst i | | | |
| s array; may be 0 if Depth==1) | | | |
| | | | |
|
| unsigned int WidthInBytes; ///< Width of 3D memory copy in bytes | | size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ | |
| unsigned int Height; ///< Height of 3D memory copy | | size_t Height; /**< Height of 3D memory copy */ | |
| unsigned int Depth; ///< Depth of 3D memory copy | | size_t Depth; /**< Depth of 3D memory copy */ | |
| } CUDA_MEMCPY3D; | | } CUDA_MEMCPY3D; | |
| | | | |
| /** | | /** | |
| * Array descriptor | | * Array descriptor | |
| */ | | */ | |
|
| typedef struct | | typedef struct CUDA_ARRAY_DESCRIPTOR_st | |
| { | | { | |
|
| unsigned int Width; ///< Width of array | | size_t Width; /**< Width of array */ | |
| unsigned int Height; ///< Height of array | | size_t Height; /**< Height of array */ | |
| | | | |
| CUarray_format Format; ///< Array format | | | |
| | | | |
|
| unsigned int NumChannels; ///< Channels per array element | | CUarray_format Format; /**< Array format */ | |
| | | unsigned int NumChannels; /**< Channels per array element */ | |
| } CUDA_ARRAY_DESCRIPTOR; | | } CUDA_ARRAY_DESCRIPTOR; | |
| | | | |
| /** | | /** | |
| * 3D array descriptor | | * 3D array descriptor | |
| */ | | */ | |
|
| typedef struct | | typedef struct CUDA_ARRAY3D_DESCRIPTOR_st | |
| { | | { | |
|
| unsigned int Width; ///< Width of 3D array | | size_t Width; /**< Width of 3D array */ | |
| unsigned int Height; ///< Height of 3D array | | size_t Height; /**< Height of 3D array */ | |
| unsigned int Depth; ///< Depth of 3D array | | size_t Depth; /**< Depth of 3D array */ | |
| | | | |
| CUarray_format Format; ///< Array format | | | |
| | | | |
| unsigned int NumChannels; ///< Channels per array element | | | |
| | | | |
|
| unsigned int Flags; ///< Flags | | CUarray_format Format; /**< Array format */ | |
| | | unsigned int NumChannels; /**< Channels per array element */ | |
| | | unsigned int Flags; /**< Flags */ | |
| } CUDA_ARRAY3D_DESCRIPTOR; | | } CUDA_ARRAY3D_DESCRIPTOR; | |
| | | | |
|
| // if set, the CUDA array contains an array of 2D slices | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| // and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies | | | |
| // the number of slices, not the depth of a 3D array. | | /** | |
| | | * If set, the CUDA array contains an array of 2D slices | |
| | | * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies | |
| | | * the number of slices, not the depth of a 3D array. | |
| | | */ | |
| #define CUDA_ARRAY3D_2DARRAY 0x01 | | #define CUDA_ARRAY3D_2DARRAY 0x01 | |
| | | | |
|
| // this flag must be set in order to bind a surface reference | | /** | |
| // to the CUDA array | | * This flag must be set in order to bind a surface reference | |
| | | * to the CUDA array | |
| | | */ | |
| #define CUDA_ARRAY3D_SURFACE_LDST 0x02 | | #define CUDA_ARRAY3D_SURFACE_LDST 0x02 | |
| | | | |
| /** | | /** | |
| * Override the texref format with a format inferred from the array. | | * Override the texref format with a format inferred from the array. | |
| * Flag for ::cuTexRefSetArray() | | * Flag for ::cuTexRefSetArray() | |
| */ | | */ | |
| #define CU_TRSA_OVERRIDE_FORMAT 0x01 | | #define CU_TRSA_OVERRIDE_FORMAT 0x01 | |
| | | | |
| /** | | /** | |
| * Read the texture as integers rather than promoting the values to floats | | * Read the texture as integers rather than promoting the values to floats | |
| | | | |
| skipping to change at line 619 | | skipping to change at line 865 | |
| */ | | */ | |
| #define CU_TRSF_READ_AS_INTEGER 0x01 | | #define CU_TRSF_READ_AS_INTEGER 0x01 | |
| | | | |
| /** | | /** | |
| * Use normalized texture coordinates in the range [0,1) instead of [0,dim)
. | | * Use normalized texture coordinates in the range [0,1) instead of [0,dim)
. | |
| * Flag for ::cuTexRefSetFlags() | | * Flag for ::cuTexRefSetFlags() | |
| */ | | */ | |
| #define CU_TRSF_NORMALIZED_COORDINATES 0x02 | | #define CU_TRSF_NORMALIZED_COORDINATES 0x02 | |
| | | | |
| /** | | /** | |
|
| | | * Perform sRGB->linear conversion during texture read. | |
| | | * Flag for ::cuTexRefSetFlags() | |
| | | */ | |
| | | #define CU_TRSF_SRGB 0x10 | |
| | | | |
| | | /** | |
| * For texture references loaded into the module, use default texunit from | | * For texture references loaded into the module, use default texunit from | |
| * texture reference. | | * texture reference. | |
| */ | | */ | |
| #define CU_PARAM_TR_DEFAULT -1 | | #define CU_PARAM_TR_DEFAULT -1 | |
| | | | |
|
| /** @} */ | | | |
| /** @} */ /* END CUDA_TYPES */ | | /** @} */ /* END CUDA_TYPES */ | |
| | | | |
| #ifdef _WIN32 | | #ifdef _WIN32 | |
| #define CUDAAPI __stdcall | | #define CUDAAPI __stdcall | |
| #else | | #else | |
| #define CUDAAPI | | #define CUDAAPI | |
| #endif | | #endif | |
| | | | |
|
| /********************************* | | /** | |
| ** Initialization | | * \defgroup CUDA_INITIALIZE Initialization | |
| *********************************/ | | * | |
| CUresult CUDAAPI cuInit(unsigned int Flags); | | * This section describes the initialization functions of the low-level CUD | |
| | | A | |
| | | * driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
|
| /********************************* | | /** | |
| ** Driver Version Query | | * \brief Initialize the CUDA driver API | |
| *********************************/ | | * | |
| CUresult CUDAAPI cuDriverGetVersion(int *driverVersion); | | * Initializes the driver API and must be called before any other function | |
| | | from | |
| | | * the driver API. Currently, the \p Flags parameter must be 0. If ::cuInit | |
| | | () | |
| | | * has not been called, any function from the driver API will return | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED. | |
| | | * | |
| | | * \param Flags - Initialization flag for CUDA. | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | */ | |
| | | CUresult CUDAAPI cuInit(unsigned int Flags); | |
| | | | |
|
| /************************************ | | /** @} */ /* END CUDA_INITIALIZE */ | |
| ** | | | |
| ** Device management | | | |
| ** | | | |
| ***********************************/ | | | |
| | | | |
|
| CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal); | | /** | |
| CUresult CUDAAPI cuDeviceGetCount(int *count); | | * \defgroup CUDA_VERSION Version Management | |
| CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev); | | * | |
| CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUd | | * This section describes the version management functions of the low-level | |
| evice dev); | | * CUDA driver application programming interface. | |
| CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev); | | * | |
| CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev); | | * @{ | |
| CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attr | | */ | |
| ib, CUdevice dev); | | | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Returns the CUDA driver version | |
| ** Context management | | * | |
| ** | | * Returns in \p *driverVersion the version number of the installed CUDA | |
| ***********************************/ | | * driver. This function automatically returns ::CUDA_ERROR_INVALID_VALUE i | |
| | | f | |
| | | * the \p driverVersion argument is NULL. | |
| | | * | |
| | | * \param driverVersion - Returns the CUDA driver version | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | */ | |
| | | CUresult CUDAAPI cuDriverGetVersion(int *driverVersion); | |
| | | | |
|
| CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUde | | /** @} */ /* END CUDA_VERSION */ | |
| vice dev ); | | | |
| CUresult CUDAAPI cuCtxDestroy( CUcontext ctx ); | | | |
| CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags); | | | |
| CUresult CUDAAPI cuCtxDetach(CUcontext ctx); | | | |
| CUresult CUDAAPI cuCtxPushCurrent( CUcontext ctx ); | | | |
| CUresult CUDAAPI cuCtxPopCurrent( CUcontext *pctx ); | | | |
| CUresult CUDAAPI cuCtxGetDevice(CUdevice *device); | | | |
| CUresult CUDAAPI cuCtxSynchronize(void); | | | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \defgroup CUDA_DEVICE Device Management | |
| ** Module management | | * | |
| ** | | * This section describes the device management functions of the low-level | |
| ***********************************/ | | * CUDA driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
|
| CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname); | | /** | |
| CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) | | * \brief Returns a handle to a compute device | |
| ; | | * | |
| CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *imag | | * Returns in \p *device a device handle given an ordinal in the range <b>[ | |
| e, unsigned int numOptions, CUjit_option *options, void **optionValues); | | 0, | |
| CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *f | | * ::cuDeviceGetCount()-1]</b>. | |
| atCubin); | | * | |
| CUresult CUDAAPI cuModuleUnload(CUmodule hmod); | | * \param device - Returned device handle | |
| CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, | | * \param ordinal - Device number to get handle for | |
| const char *name); | | * | |
| CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *by | | * \return | |
| tes, CUmodule hmod, const char *name); | | * ::CUDA_SUCCESS, | |
| CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, c | | * ::CUDA_ERROR_DEINITIALIZED, | |
| onst char *name); | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| , const char *name); | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGetProperties, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Returns the number of compute-capable devices | |
| ** Memory management | | * | |
| ** | | * Returns in \p *count the number of devices with compute capability great | |
| ***********************************/ | | er | |
| | | * than or equal to 1.0 that are available for execution. If there is no su | |
| | | ch | |
| | | * device, ::cuDeviceGetCount() returns 0. | |
| | | * | |
| | | * \param count - Returned number of compute-capable devices | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceGetProperties, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceGetCount(int *count); | |
| | | | |
|
| CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total); | | /** | |
| | | * \brief Returns an identifer string for the device | |
| | | * | |
| | | * Returns an ASCII string identifying the device \p dev in the NULL-termin | |
| | | ated | |
| | | * string pointed to by \p name. \p len specifies the maximum length of the | |
| | | * string that may be returned. | |
| | | * | |
| | | * \param name - Returned identifier string for the device | |
| | | * \param len - Maximum length of string to store in \p name | |
| | | * \param dev - Device to get identifier string for | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceGetProperties, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev); | |
| | | | |
|
| CUresult CUDAAPI cuMemAlloc( CUdeviceptr *dptr, unsigned int bytesize); | | /** | |
| CUresult CUDAAPI cuMemAllocPitch( CUdeviceptr *dptr, | | * \brief Returns the compute capability of the device | |
| unsigned int *pPitch, | | * | |
| unsigned int WidthInBytes, | | * Returns in \p *major and \p *minor the major and minor revision numbers | |
| unsigned int Height, | | that | |
| // size of biggest r/w to be performe | | * define the compute capability of the device \p dev. | |
| d by kernels on this memory | | * | |
| // 4, 8 or 16 bytes | | * \param major - Major revision number | |
| unsigned int ElementSizeBytes | | * \param minor - Minor revision number | |
| ); | | * \param dev - Device handle | |
| CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); | | * | |
| CUresult CUDAAPI cuMemGetAddressRange( CUdeviceptr *pbase, unsigned int | | * \return | |
| *psize, CUdeviceptr dptr ); | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceGetProperties, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice | |
| | | dev); | |
| | | | |
|
| CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize); | | #if __CUDA_API_VERSION >= 3020 | |
| CUresult CUDAAPI cuMemFreeHost(void *p); | | /** | |
| | | * \brief Returns the total amount of memory on the device | |
| | | * | |
| | | * Returns in \p *bytes the total amount of memory available on the device | |
| | | * \p dev in bytes. | |
| | | * | |
| | | * \param bytes - Returned memory available on device in bytes | |
| | | * \param dev - Device handle | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceGetProperties, | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
|
| CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned in | | /** | |
| t Flags ); | | * \brief Returns properties for a selected device | |
| | | * | |
| | | * Returns in \p *prop the properties of device \p dev. The ::CUdevprop | |
| | | * structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUdevprop_st { | |
| | | int maxThreadsPerBlock; | |
| | | int maxThreadsDim[3]; | |
| | | int maxGridSize[3]; | |
| | | int sharedMemPerBlock; | |
| | | int totalConstantMemory; | |
| | | int SIMDWidth; | |
| | | int memPitch; | |
| | | int regsPerBlock; | |
| | | int clockRate; | |
| | | int textureAlign | |
| | | } CUdevprop; | |
| | | * \endcode | |
| | | * where: | |
| | | * | |
| | | * - ::maxThreadsPerBlock is the maximum number of threads per block; | |
| | | * - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block; | |
| | | * - ::maxGridSize[3] is the maximum sizes of each dimension of a grid; | |
| | | * - ::sharedMemPerBlock is the total amount of shared memory available per | |
| | | * block in bytes; | |
| | | * - ::totalConstantMemory is the total amount of constant memory available | |
| | | on | |
| | | * the device in bytes; | |
| | | * - ::SIMDWidth is the warp size; | |
| | | * - ::memPitch is the maximum pitch allowed by the memory copy functions t | |
| | | hat | |
| | | * involve memory regions allocated through ::cuMemAllocPitch(); | |
| | | * - ::regsPerBlock is the total number of registers available per block; | |
| | | * - ::clockRate is the clock frequency in kilohertz; | |
| | | * - ::textureAlign is the alignment requirement; texture base addresses th | |
| | | at | |
| | | * are aligned to ::textureAlign bytes do not need an offset applied to | |
| | | * texture fetches. | |
| | | * | |
| | | * \param prop - Returned properties of device | |
| | | * \param dev - Device to get properties for | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetAttribute, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev); | |
| | | | |
|
| CUresult CUDAAPI cuMemHostGetDevicePointer( CUdeviceptr *pdptr, void *p | | /** | |
| , unsigned int Flags ); | | * \brief Returns information about the device | |
| CUresult CUDAAPI cuMemHostGetFlags( unsigned int *pFlags, void *p ); | | * | |
| | | * Returns in \p *pi the integer value of the attribute \p attrib on device | |
| | | * \p dev. The supported attributes are: | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads | |
| | | per | |
| | | * block; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of | |
| | | * shared memory available to a thread block in bytes; this amount is sha | |
| | | red | |
| | | * by all thread blocks simultaneously resident on a multiprocessor; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on devic | |
| | | e for | |
| | | * __constant__ variables in a CUDA C kernel in bytes; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the | |
| | | * memory copy functions that involve memory regions allocated through | |
| | | * ::cuMemAllocPitch(); | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bi | |
| | | t | |
| | | * registers available to a thread block; this number is shared by all th | |
| | | read | |
| | | * blocks simultaneously resident on a multiprocessor; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: Peak clock frequency in kilohertz; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; textur | |
| | | e | |
| | | * base addresses aligned to ::textureAlign bytes do not need an offset | |
| | | * applied to texture fetches; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently co | |
| | | py | |
| | | * memory between host and device while executing a kernel, or 0 if not; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors | |
| | | on | |
| | | * the device; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time li | |
| | | mit | |
| | | * for kernels executed on the device, or 0 if not; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with t | |
| | | he | |
| | | * memory subsystem, or 0 if not; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map hos | |
| | | t | |
| | | * memory into the CUDA address space, or 0 if not; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is curren | |
| | | tly | |
| | | * in. Available modes are as follows: | |
| | | * - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted an | |
| | | d | |
| | | * can have multiple CUDA contexts present at a single time. | |
| | | * - ::CU_COMPUTEMODE_EXCLUSIVE: Compute-exclusive mode - Device can have | |
| | | * only one CUDA context present on it at a time. | |
| | | * - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is | |
| | | * prohibited from creating new CUDA contexts. | |
| | | * - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports | |
| | | * executing multiple kernels within the same context simultaneously, or | |
| | | 0 if | |
| | | * not. It is not guaranteed that multiple kernels will be resident | |
| | | * on the device concurrently so this feature should not be relied upon f | |
| | | or | |
| | | * correctness; | |
| | | * - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on | |
| | | the | |
| | | * device, 0 if error correction is disabled or not supported by the dev | |
| | | ice. | |
| | | * - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device. | |
| | | * - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) i | |
| | | dentifier | |
| | | * of the device. | |
| | | * - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC drive | |
| | | r. TCC | |
| | | is only available on Tesla hardware running Windows Vista or later. | |
| | | * \param pi - Returned device attribute value | |
| | | * \param attrib - Device attribute to query | |
| | | * \param dev - Device handle | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuDeviceComputeCapability, | |
| | | * ::cuDeviceGetCount, | |
| | | * ::cuDeviceGetName, | |
| | | * ::cuDeviceGet, | |
| | | * ::cuDeviceGetProperties, | |
| | | * ::cuDeviceTotalMem | |
| | | */ | |
| | | CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, C | |
| | | Udevice dev); | |
| | | | |
|
| /************************************ | | /** @} */ /* END CUDA_DEVICE */ | |
| ** | | | |
| ** Synchronous Memcpy | | | |
| ** | | | |
| ** Intra-device memcpy's done with these functions may execute in para | | | |
| llel with the CPU, | | | |
| ** but if host memory is involved, they wait until the copy is done be | | | |
| fore returning. | | | |
| ** | | | |
| ***********************************/ | | | |
| | | | |
|
| // 1D functions | | /** | |
| // system <-> device memory | | * \defgroup CUDA_CTX Context Management | |
| CUresult CUDAAPI cuMemcpyHtoD (CUdeviceptr dstDevice, const void * | | * | |
| srcHost, unsigned int ByteCount ); | | * This section describes the context management functions of the low-level | |
| CUresult CUDAAPI cuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevic | | * CUDA driver application programming interface. | |
| e, unsigned int ByteCount ); | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
|
| // device <-> device memory | | #if __CUDA_API_VERSION >= 3020 | |
| CUresult CUDAAPI cuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr | | /** | |
| srcDevice, unsigned int ByteCount ); | | * \brief Create a CUDA context | |
| | | * | |
| | | * Creates a new CUDA context and associates it with the calling thread. Th | |
| | | e | |
| | | * \p flags parameter is described below. The context is created with a usa | |
| | | ge | |
| | | * count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy() | |
| | | or | |
| | | * ::cuCtxDetach() when done using the context. If a context is already cur | |
| | | rent | |
| | | * to the thread, it is supplanted by the newly created context and may be | |
| | | * restored by a subsequent call to ::cuCtxPopCurrent(). | |
| | | * | |
| | | * The two LSBs of the \p flags parameter can be used to control how the OS | |
| | | * thread, which owns the CUDA context at the time of an API call, interact | |
| | | s | |
| | | * with the OS scheduler when waiting for results from the GPU. | |
| | | * | |
| | | * - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is ze | |
| | | ro, | |
| | | * uses a heuristic based on the number of active CUDA contexts in the | |
| | | * process \e C and the number of logical processors in the system \e P. If | |
| | | * \e C > \e P, then CUDA will yield to other OS threads when waiting for | |
| | | * the GPU, otherwise CUDA will not yield while waiting for results and | |
| | | * actively spin on the processor. | |
| | | * | |
| | | * - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for | |
| | | * results from the GPU. This can decrease latency when waiting for the GPU | |
| | | , | |
| | | * but may lower the performance of CPU threads if they are performing work | |
| | | in | |
| | | * parallel with the CUDA thread. | |
| | | * | |
| | | * - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting f | |
| | | or | |
| | | * results from the GPU. This can increase latency when waiting for the GPU | |
| | | , | |
| | | * but can increase the performance of CPU threads performing work in paral | |
| | | lel | |
| | | * with the GPU. | |
| | | * | |
| | | * - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a | |
| | | * synchronization primitive when waiting for the GPU to finish work. | |
| | | * | |
| | | * - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. | |
| | | * This flag must be set in order to allocate pinned host memory that is | |
| | | * accessible to the GPU. | |
| | | * | |
| | | * - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory | |
| | | * after resizing local memory for a kernel. This can prevent thrashing by | |
| | | * local memory allocations when launching many kernels with high local | |
| | | * memory usage at the cost of potentially increased memory usage. | |
| | | * | |
| | | * <b>Note to Linux users</b>: | |
| | | * | |
| | | * Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode | |
| | | of | |
| | | * the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation w | |
| | | ill | |
| | | * also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device i | |
| | | s | |
| | | * set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active context | |
| | | on | |
| | | * the device. The function ::cuDeviceGetAttribute() can be used with | |
| | | * ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the | |
| | | * device. The <i>nvidia-smi</i> tool can be used to set the compute mode f | |
| | | or | |
| | | * devices. Documentation for <i>nvidia-smi</i> can be obtained by passing | |
| | | a | |
| | | * -h option to it. | |
| | | * | |
| | | * \param pctx - Returned context handle of the new context | |
| | | * \param flags - Context creation flags | |
| | | * \param dev - Device to create context on | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_DEVICE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice | |
| | | dev); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
|
| // device <-> array memory | | /** | |
| CUresult CUDAAPI cuMemcpyDtoA ( CUarray dstArray, unsigned int dst | | * \brief Destroy the current context or a floating CUDA context | |
| Offset, CUdeviceptr srcDevice, unsigned int ByteCount ); | | * | |
| CUresult CUDAAPI cuMemcpyAtoD ( CUdeviceptr dstDevice, CUarray src | | * Destroys the CUDA context specified by \p ctx. If the context usage coun | |
| Array, unsigned int srcOffset, unsigned int ByteCount ); | | t is | |
| | | * not equal to 1, or the context is current to any CPU thread other than t | |
| | | he | |
| | | * current one, this function fails. Floating contexts (detached from a CPU | |
| | | * thread via ::cuCtxPopCurrent()) may be destroyed by this function. | |
| | | * | |
| | | * \param ctx - Context to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxDestroy(CUcontext ctx); | |
| | | | |
|
| // system <-> array memory | | /** | |
| CUresult CUDAAPI cuMemcpyHtoA( CUarray dstArray, unsigned int dstO | | * \brief Increment a context's usage-count | |
| ffset, const void *srcHost, unsigned int ByteCount ); | | * | |
| CUresult CUDAAPI cuMemcpyAtoH( void *dstHost, CUarray srcArray, un | | * Increments the usage count of the context and passes back a context hand | |
| signed int srcOffset, unsigned int ByteCount ); | | le | |
| | | * in \p *pctx that must be passed to ::cuCtxDetach() when the application | |
| | | is | |
| | | * done with the context. ::cuCtxAttach() fails if there is no context curr | |
| | | ent | |
| | | * to the thread. | |
| | | * | |
| | | * Currently, the \p flags parameter must be 0. | |
| | | * | |
| | | * \param pctx - Returned context handle of the current context | |
| | | * \param flags - Context attach flags (must be 0) | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags); | |
| | | | |
|
| // array <-> array memory | | /** | |
| CUresult CUDAAPI cuMemcpyAtoA( CUarray dstArray, unsigned int dstO | | * \brief Decrement a context's usage-count | |
| ffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount ); | | * | |
| | | * Decrements the usage count of the context \p ctx, and destroys the conte | |
| | | xt | |
| | | * if the usage count goes to 0. The context must be a handle that was pass | |
| | | ed | |
| | | * back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the | |
| | | * calling thread. | |
| | | * | |
| | | * \param ctx - Context to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxDetach(CUcontext ctx); | |
| | | | |
|
| // 2D memcpy | | /** | |
| | | * \brief Pushes a floating context on the current CPU thread | |
| | | * | |
| | | * Pushes the given context \p ctx onto the CPU thread's stack of current | |
| | | * contexts. The specified context becomes the CPU thread's current context | |
| | | , so | |
| | | * all CUDA functions that operate on the current context are affected. | |
| | | * | |
| | | * The previous current context may be made current again by calling | |
| | | * ::cuCtxDestroy() or ::cuCtxPopCurrent(). | |
| | | * | |
| | | * The context must be "floating," i.e. not attached to any thread. Context | |
| | | s are | |
| | | * made to float by calling ::cuCtxPopCurrent(). | |
| | | * | |
| | | * \param ctx - Floating context to attach | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx ); | |
| | | | |
|
| CUresult CUDAAPI cuMemcpy2D( const CUDA_MEMCPY2D *pCopy ); | | /** | |
| CUresult CUDAAPI cuMemcpy2DUnaligned( const CUDA_MEMCPY2D *pCopy ) | | * \brief Pops the current CUDA context from the current CPU thread | |
| ; | | * | |
| | | * Pops the current CUDA context from the CPU thread. The CUDA context must | |
| | | * have a usage count of 1. CUDA contexts have a usage count of 1 upon | |
| | | * creation; the usage count may be incremented with ::cuCtxAttach() and | |
| | | * decremented with ::cuCtxDetach(). | |
| | | * | |
| | | * If successful, ::cuCtxPopCurrent() passes back the old context handle in | |
| | | * \p *pctx. That context may then be made current to a different CPU threa | |
| | | d | |
| | | * by calling ::cuCtxPushCurrent(). | |
| | | * | |
| | | * Floating contexts may be destroyed by calling ::cuCtxDestroy(). | |
| | | * | |
| | | * If a context was current to the CPU thread before ::cuCtxCreate() or | |
| | | * ::cuCtxPushCurrent() was called, this function makes that context curren | |
| | | t to | |
| | | * the CPU thread again. | |
| | | * | |
| | | * \param pctx - Returned new context handle | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx); | |
| | | | |
|
| // 3D memcpy | | /** | |
| | | * \brief Returns the device ID for the current context | |
| | | * | |
| | | * Returns in \p *device the ordinal of the current context's device. | |
| | | * | |
| | | * \param device - Returned device ID for the current context | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxGetDevice(CUdevice *device); | |
| | | | |
|
| CUresult CUDAAPI cuMemcpy3D( const CUDA_MEMCPY3D *pCopy ); | | /** | |
| | | * \brief Block for a context's tasks to complete | |
| | | * | |
| | | * Blocks until the device has completed all preceding requested tasks. | |
| | | * ::cuCtxSynchronize() returns an error if one of the preceding tasks fail | |
| | | ed. | |
| | | * If the context was created with the ::CU_CTX_BLOCKING_SYNC flag, the CPU | |
| | | * thread will block until the GPU context has finished its work. | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxSynchronize(void); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Set resource limits | |
| ** Asynchronous Memcpy | | * | |
| ** | | * Setting \p limit to \p value is a request by the application to update | |
| ** Any host memory involved must be DMA'able (e.g., allocated with cuM | | * the current limit maintained by the context. The driver is free to | |
| emAllocHost). | | * modify the requested value to meet h/w requirements (this could be | |
| ** memcpy's done with these functions execute in parallel with the CPU | | * clamping to minimum or maximum values, rounding up to nearest element | |
| and, if | | * size, etc). The application can use ::cuCtxGetLimit() to find out exact | |
| ** the hardware is available, may execute in parallel with the GPU. | | ly | |
| ** Asynchronous memcpy must be accompanied by appropriate stream synch | | * what the limit has been set to. | |
| ronization. | | * | |
| ** | | * Setting each ::CUlimit has its own specific restrictions, so each is | |
| ***********************************/ | | * discussed here. | |
| | | * | |
| | | * - ::CU_LIMIT_STACK_SIZE controls the stack size of each GPU thread. | |
| | | * This limit is only applicable to devices of compute capability | |
| | | * 2.0 and higher. Attempting to set this limit on devices of | |
| | | * compute capability less than 2.0 will result in the error | |
| | | * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. | |
| | | * | |
| | | * - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size of the FIFO used | |
| | | * by the ::printf() device system call. Setting | |
| | | * ::CU_LIMIT_PRINTF_FIFO_SIZE must be performed before launching any | |
| | | * kernel that uses the ::printf() device system call, otherwise | |
| | | * ::CUDA_ERROR_INVALID_VALUE will be returned. | |
| | | * This limit is only applicable to devices of compute capability | |
| | | * 2.0 and higher. Attempting to set this limit on devices of | |
| | | * compute capability less than 2.0 will result in the error | |
| | | * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. | |
| | | * | |
| | | * - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size of the heap used | |
| | | * by the ::malloc() and ::free() device system calls. Setting | |
| | | * ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching | |
| | | * any kernel that uses the ::malloc() or ::free() device system calls, | |
| | | * otherwise ::CUDA_ERROR_INVALID_VALUE will be returned. | |
| | | * This limit is only applicable to devices of compute capability | |
| | | * 2.0 and higher. Attempting to set this limit on devices of | |
| | | * compute capability less than 2.0 will result in the error | |
| | | * ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned. | |
| | | * | |
| | | * \param limit - Limit to set | |
| | | * \param value - Size in bytes of limit | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_UNSUPPORTED_LIMIT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value); | |
| | | | |
|
| // 1D functions | | /** | |
| // system <-> device memory | | * \brief Returns resource limits | |
| CUresult CUDAAPI cuMemcpyHtoDAsync (CUdeviceptr dstDevice, | | * | |
| const void *srcHost, unsigned int ByteCount, CUstream hStream ) | | * Returns in \p *pvalue the current size of \p limit. The supported | |
| ; | | * ::CUlimit values are: | |
| CUresult CUDAAPI cuMemcpyDtoHAsync (void *dstHost, | | * - ::CU_LIMIT_STACK_SIZE: stack size of each GPU thread; | |
| CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream | | * - ::CU_LIMIT_PRINTF_FIFO_SIZE: size of the FIFO used by the | |
| ); | | * ::printf() device system call. | |
| | | * - ::CU_LIMIT_MALLOC_HEAP_SIZE: size of the heap used by the | |
| | | * ::malloc() and ::free() device system calls; | |
| | | * | |
| | | * \param limit - Limit to query | |
| | | * \param pvalue - Returned size in bytes of limit | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_UNSUPPORTED_LIMIT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit); | |
| | | | |
|
| // device <-> device memory | | /** | |
| CUresult CUDAAPI cuMemcpyDtoDAsync (CUdeviceptr dstDevice, | | * \brief Returns the preferred cache configuration for the current context | |
| CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream | | . | |
| ); | | * | |
| | | * On devices where the L1 cache and shared memory use the same hardware | |
| | | * resources, this returns through \p pconfig the preferred cache configura | |
| | | tion | |
| | | * for the current context. This is only a preference. The driver will use | |
| | | * the requested configuration if possible, but it is free to choose a diff | |
| | | erent | |
| | | * configuration if required to execute functions. | |
| | | * | |
| | | * This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices | |
| | | * where the size of the L1 cache and shared memory are fixed. | |
| | | * | |
| | | * The supported cache configurations are: | |
| | | * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de | |
| | | fault) | |
| | | * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller | |
| | | L1 cache | |
| | | * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m | |
| | | emory | |
| | | * | |
| | | * \param pconfig - Returned cache configuration | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize, | |
| | | * ::cuFuncSetCacheConfig | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig); | |
| | | | |
|
| // system <-> array memory | | /** | |
| CUresult CUDAAPI cuMemcpyHtoAAsync( CUarray dstArray, unsigned int | | * \brief Sets the preferred cache configuration for the current context. | |
| dstOffset, | | * | |
| const void *srcHost, unsigned int ByteCount, CUstream hStream ) | | * On devices where the L1 cache and shared memory use the same hardware | |
| ; | | * resources, this sets through \p config the preferred cache configuration | |
| CUresult CUDAAPI cuMemcpyAtoHAsync( void *dstHost, CUarray srcArra | | for | |
| y, unsigned int srcOffset, | | * the current context. This is only a preference. The driver will use | |
| unsigned int ByteCount, CUstream hStream ); | | * the requested configuration if possible, but it is free to choose a diff | |
| | | erent | |
| | | * configuration if required to execute the function. Any function preferen | |
| | | ce | |
| | | * set via ::cuFuncSetCacheConfig() will be preferred over this context-wid | |
| | | e | |
| | | * setting. Setting the context-wide cache configuration to | |
| | | * ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to pre | |
| | | fer | |
| | | * to not change the cache configuration unless required to launch the kern | |
| | | el. | |
| | | * | |
| | | * This setting does nothing on devices where the size of the L1 cache and | |
| | | * shared memory are fixed. | |
| | | * | |
| | | * Launching a kernel with a different preference than the most recent | |
| | | * preference setting may insert a device-side synchronization point. | |
| | | * | |
| | | * The supported cache configurations are: | |
| | | * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de | |
| | | fault) | |
| | | * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller | |
| | | L1 cache | |
| | | * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m | |
| | | emory | |
| | | * | |
| | | * \param config - Requested cache configuration | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetApiVersion, | |
| | | * ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize, | |
| | | * ::cuFuncSetCacheConfig | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config); | |
| | | | |
|
| // 2D memcpy | | /** | |
| CUresult CUDAAPI cuMemcpy2DAsync( const CUDA_MEMCPY2D *pCopy, CUst | | * \brief Gets the context's API version. | |
| ream hStream ); | | * | |
| | | * Returns the API version used to create \p ctx in \p version. If \p ctx | |
| | | * is NULL, returns the API version used to create the currently bound | |
| | | * context. | |
| | | * | |
| | | * This wil return the API version used to create a context (for example, | |
| | | * 3010 or 3020), which library developers can use to direct callers to a | |
| | | * specific API version. Note that this API version may not be the same as | |
| | | * returned by cuDriverGetVersion. | |
| | | * | |
| | | * \param ctx - Context to check | |
| | | * \param version - Pointer to version | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxAttach, | |
| | | * ::cuCtxCreate, | |
| | | * ::cuCtxDestroy, | |
| | | * ::cuCtxDetach, | |
| | | * ::cuCtxGetDevice, | |
| | | * ::cuCtxGetLimit, | |
| | | * ::cuCtxPopCurrent, | |
| | | * ::cuCtxPushCurrent, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuCtxSetLimit, | |
| | | * ::cuCtxSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version); | |
| | | | |
|
| // 3D memcpy | | /** @} */ /* END CUDA_CTX */ | |
| CUresult CUDAAPI cuMemcpy3DAsync( const CUDA_MEMCPY3D *pCopy, CUst | | | |
| ream hStream ); | | | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \defgroup CUDA_MODULE Module Management | |
| ** Memset | | * | |
| ** | | * This section describes the module management functions of the low-level | |
| ***********************************/ | | CUDA | |
| CUresult CUDAAPI cuMemsetD8( CUdeviceptr dstDevice, unsigned char | | * driver application programming interface. | |
| uc, unsigned int N ); | | * | |
| CUresult CUDAAPI cuMemsetD16( CUdeviceptr dstDevice, unsigned shor | | * @{ | |
| t us, unsigned int N ); | | */ | |
| CUresult CUDAAPI cuMemsetD32( CUdeviceptr dstDevice, unsigned int | | | |
| ui, unsigned int N ); | | | |
| | | | |
|
| CUresult CUDAAPI cuMemsetD2D8( CUdeviceptr dstDevice, unsigned int | | /** | |
| dstPitch, unsigned char uc, unsigned int Width, unsigned int Height ); | | * \brief Loads a compute module | |
| CUresult CUDAAPI cuMemsetD2D16( CUdeviceptr dstDevice, unsigned in | | * | |
| t dstPitch, unsigned short us, unsigned int Width, unsigned int Height ); | | * Takes a filename \p fname and loads the corresponding module \p module i | |
| CUresult CUDAAPI cuMemsetD2D32( CUdeviceptr dstDevice, unsigned in | | nto | |
| t dstPitch, unsigned int ui, unsigned int Width, unsigned int Height ); | | * the current context. The CUDA driver API does not attempt to lazily | |
| | | * allocate the resources needed by a module; if the memory for functions a | |
| | | nd | |
| | | * data (constant and global) needed by the module cannot be allocated, | |
| | | * ::cuModuleLoad() fails. The file should be a \e cubin file as output by | |
| | | * \b nvcc or a \e PTX file, either as output by \b nvcc or handwrtten. | |
| | | * | |
| | | * \param module - Returned module | |
| | | * \param fname - Filename of module to load | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_FILE_NOT_FOUND, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Load a module's data | |
| ** Function management | | * | |
| ** | | * Takes a pointer \p image and loads the corresponding module \p module in | |
| ***********************************/ | | to | |
| | | * the current context. The pointer may be obtained by mapping a \e cubin o | |
| | | r | |
| | | * \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text | |
| | | * string, or incorporating a \e cubin object into the executable resources | |
| | | * and using operating system calls such as Windows \c FindResource() to | |
| | | * obtain the pointer. | |
| | | * | |
| | | * \param module - Returned module | |
| | | * \param image - Module data to load | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image); | |
| | | | |
|
| CUresult CUDAAPI cuFuncSetBlockShape (CUfunction hfunc, int x, int y, i | | /** | |
| nt z); | | * \brief Load a module's data with options | |
| CUresult CUDAAPI cuFuncSetSharedSize (CUfunction hfunc, unsigned int by | | * | |
| tes); | | * Takes a pointer \p image and loads the corresponding module \p module in | |
| CUresult CUDAAPI cuFuncGetAttribute (int *pi, CUfunction_attribute attr | | to | |
| ib, CUfunction hfunc); | | * the current context. The pointer may be obtained by mapping a \e cubin o | |
| CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache co | | r | |
| nfig); | | * \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text | |
| | | * string, or incorporating a \e cubin object into the executable resources | |
| | | * and using operating system calls such as Windows \c FindResource() to | |
| | | * obtain the pointer. Options are passed as an array via \p options and an | |
| | | y | |
| | | * corresponding parameters are passed in \p optionValues. The number of to | |
| | | tal | |
| | | * options is supplied via \p numOptions. Any outputs will be returned via | |
| | | * \p optionValues. Supported options are (types for the option values are | |
| | | * specified in parentheses after the option name): | |
| | | * | |
| | | * - ::CU_JIT_MAX_REGISTERS: (unsigned int) input specifies the maximum num | |
| | | ber | |
| | | * of registers per thread; | |
| | | * - ::CU_JIT_THREADS_PER_BLOCK: (unsigned int) input specifies number of | |
| | | * threads per block to target compilation for; output returns the number o | |
| | | f | |
| | | * threads the compiler actually targeted; | |
| | | * - ::CU_JIT_WALL_TIME: (float) output returns the float value of wall clo | |
| | | ck | |
| | | * time, in milliseconds, spent compiling the \e PTX code; | |
| | | * - ::CU_JIT_INFO_LOG_BUFFER: (char*) input is a pointer to a buffer in | |
| | | * which to print any informational log messages from \e PTX assembly (the | |
| | | * buffer size is specified via option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) | |
| | | ; | |
| | | * - ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size | |
| | | in | |
| | | * bytes of the buffer; output is the number of bytes filled with messages; | |
| | | * - ::CU_JIT_ERROR_LOG_BUFFER: (char*) input is a pointer to a buffer in | |
| | | * which to print any error log messages from \e PTX assembly (the buffer s | |
| | | ize | |
| | | * is specified via option ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES); | |
| | | * - ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size | |
| | | in | |
| | | * bytes of the buffer; output is the number of bytes filled with messages; | |
| | | * - ::CU_JIT_OPTIMIZATION_LEVEL: (unsigned int) input is the level of | |
| | | * optimization to apply to generated code (0 - 4), with 4 being the defaul | |
| | | t | |
| | | * and highest level; | |
| | | * - ::CU_JIT_TARGET_FROM_CUCONTEXT: (No option value) causes compilation | |
| | | * target to be determined based on current attached context (default); | |
| | | * - ::CU_JIT_TARGET: (unsigned int for enumerated type ::CUjit_target_enum | |
| | | ) | |
| | | * input is the compilation target based on supplied ::CUjit_target_enum; | |
| | | * possible values are: | |
| | | * - ::CU_TARGET_COMPUTE_10 | |
| | | * - ::CU_TARGET_COMPUTE_11 | |
| | | * - ::CU_TARGET_COMPUTE_12 | |
| | | * - ::CU_TARGET_COMPUTE_13 | |
| | | * - ::CU_TARGET_COMPUTE_20 | |
| | | * - ::CU_JIT_FALLBACK_STRATEGY: (unsigned int for enumerated type | |
| | | * ::CUjit_fallback_enum) chooses fallback strategy if matching cubin is no | |
| | | t | |
| | | * found; possible values are: | |
| | | * - ::CU_PREFER_PTX | |
| | | * - ::CU_PREFER_BINARY | |
| | | * | |
| | | * \param module - Returned module | |
| | | * \param image - Module data to load | |
| | | * \param numOptions - Number of options | |
| | | * \param options - Options for JIT | |
| | | * \param optionValues - Option values for JIT | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_NO_BINARY_FOR_GPU, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, un | |
| | | signed int numOptions, CUjit_option *options, void **optionValues); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Load a module's data | |
| ** Array management | | * | |
| ** | | * Takes a pointer \p fatCubin and loads the corresponding module \p module | |
| ***********************************/ | | * into the current context. The pointer represents a <i>fat binary</i> obj | |
| | | ect, | |
| | | * which is a collection of different \e cubin files, all representing the | |
| | | same | |
| | | * device code, but compiled and optimized for different architectures. The | |
| | | re | |
| | | * is currently no documented API for constructing and using fat binary obj | |
| | | ects | |
| | | * by programmers, and therefore this function is an internal function in t | |
| | | his | |
| | | * version of CUDA. More information can be found in the \b nvcc document. | |
| | | * | |
| | | * \param module - Returned module | |
| | | * \param fatCubin - Fat binary to load | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_NO_BINARY_FOR_GPU, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCub | |
| | | in); | |
| | | | |
|
| CUresult CUDAAPI cuArrayCreate( CUarray *pHandle, const CUDA_ARRAY_DES | | /** | |
| CRIPTOR *pAllocateArray ); | | * \brief Unloads a module | |
| CUresult CUDAAPI cuArrayGetDescriptor( CUDA_ARRAY_DESCRIPTOR *pArrayDe | | * | |
| scriptor, CUarray hArray ); | | * Unloads a module \p hmod from the current context. | |
| CUresult CUDAAPI cuArrayDestroy( CUarray hArray ); | | * | |
| | | * \param hmod - Module to unload | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleUnload(CUmodule hmod); | |
| | | | |
|
| CUresult CUDAAPI cuArray3DCreate( CUarray *pHandle, const CUDA_ARRAY3D | | /** | |
| _DESCRIPTOR *pAllocateArray ); | | * \brief Returns a function handle | |
| CUresult CUDAAPI cuArray3DGetDescriptor( CUDA_ARRAY3D_DESCRIPTOR *pArr | | * | |
| ayDescriptor, CUarray hArray ); | | * Returns in \p *hfunc the handle of the function of name \p name located | |
| | | in | |
| | | * module \p hmod. If no function of that name exists, ::cuModuleGetFunctio | |
| | | n() | |
| | | * returns ::CUDA_ERROR_NOT_FOUND. | |
| | | * | |
| | | * \param hfunc - Returned function handle | |
| | | * \param hmod - Module to retrieve function from | |
| | | * \param name - Name of function to retrieve | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, cons | |
| | | t char *name); | |
| | | | |
|
| /************************************ | | #if __CUDA_API_VERSION >= 3020 | |
| ** | | /** | |
| ** Texture reference management | | * \brief Returns a global pointer from a module | |
| ** | | * | |
| ***********************************/ | | * Returns in \p *dptr and \p *bytes the base pointer and size of the | |
| CUresult CUDAAPI cuTexRefCreate( CUtexref *pTexRef ); | | * global of name \p name located in module \p hmod. If no variable of that | |
| CUresult CUDAAPI cuTexRefDestroy( CUtexref hTexRef ); | | name | |
| | | * exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND. Both | |
| | | * parameters \p dptr and \p bytes are optional. If one of them is | |
| | | * NULL, it is ignored. | |
| | | * | |
| | | * \param dptr - Returned global device pointer | |
| | | * \param bytes - Returned global size in bytes | |
| | | * \param hmod - Module to retrieve global from | |
| | | * \param name - Name of global to retrieve | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodu | |
| | | le hmod, const char *name); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
|
| CUresult CUDAAPI cuTexRefSetArray( CUtexref hTexRef, CUarray hArray, u | | /** | |
| nsigned int Flags ); | | * \brief Returns a handle to a texture reference | |
| CUresult CUDAAPI cuTexRefSetAddress( unsigned int *ByteOffset, CUtexre | | * | |
| f hTexRef, CUdeviceptr dptr, unsigned int bytes ); | | * Returns in \p *pTexRef the handle of the texture reference of name \p na | |
| CUresult CUDAAPI cuTexRefSetAddress2D( CUtexref hTexRef, const CUDA_AR | | me | |
| RAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); | | * in the module \p hmod. If no texture reference of that name exists, | |
| CUresult CUDAAPI cuTexRefSetFormat( CUtexref hTexRef, CUarray_format f | | * ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture refer | |
| mt, int NumPackedComponents ); | | ence | |
| CUresult CUDAAPI cuTexRefSetAddressMode( CUtexref hTexRef, int dim, CU | | * handle should not be destroyed, since it will be destroyed when the modu | |
| address_mode am ); | | le | |
| CUresult CUDAAPI cuTexRefSetFilterMode( CUtexref hTexRef, CUfilter_mod | | * is unloaded. | |
| e fm ); | | * | |
| CUresult CUDAAPI cuTexRefSetFlags( CUtexref hTexRef, unsigned int Flag | | * \param pTexRef - Returned texture reference | |
| s ); | | * \param hmod - Module to retrieve texture reference from | |
| | | * \param name - Name of texture reference to retrieve | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetSurfRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const | |
| | | char *name); | |
| | | | |
|
| CUresult CUDAAPI cuTexRefGetAddress( CUdeviceptr *pdptr, CUtexref hTex | | /** | |
| Ref ); | | * \brief Returns a handle to a surface reference | |
| CUresult CUDAAPI cuTexRefGetArray( CUarray *phArray, CUtexref hTexRef | | * | |
| ); | | * Returns in \p *pSurfRef the handle of the surface reference of name \p n | |
| CUresult CUDAAPI cuTexRefGetAddressMode( CUaddress_mode *pam, CUtexref | | ame | |
| hTexRef, int dim ); | | * in the module \p hmod. If no surface reference of that name exists, | |
| CUresult CUDAAPI cuTexRefGetFilterMode( CUfilter_mode *pfm, CUtexref h | | * ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND. | |
| TexRef ); | | * | |
| CUresult CUDAAPI cuTexRefGetFormat( CUarray_format *pFormat, int *pNum | | * \param pSurfRef - Returned surface reference | |
| Channels, CUtexref hTexRef ); | | * \param hmod - Module to retrieve surface reference from | |
| CUresult CUDAAPI cuTexRefGetFlags( unsigned int *pFlags, CUtexref hTex | | * \param name - Name of surface reference to retrieve | |
| Ref ); | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_FOUND | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuModuleGetFunction, | |
| | | * ::cuModuleGetGlobal, | |
| | | * ::cuModuleGetTexRef, | |
| | | * ::cuModuleLoad, | |
| | | * ::cuModuleLoadData, | |
| | | * ::cuModuleLoadDataEx, | |
| | | * ::cuModuleLoadFatBinary, | |
| | | * ::cuModuleUnload | |
| | | */ | |
| | | CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, con | |
| | | st char *name); | |
| | | | |
|
| /************************************ | | /** @} */ /* END CUDA_MODULE */ | |
| ** | | | |
| ** Surface reference management | | | |
| ** | | | |
| ***********************************/ | | | |
| | | | |
|
| CUresult CUDAAPI cuSurfRefSetArray( CUsurfref hSurfRef, CUarray hArray | | /** | |
| , unsigned int Flags ); | | * \defgroup CUDA_MEM Memory Management | |
| CUresult CUDAAPI cuSurfRefGetArray( CUarray *phArray, CUsurfref hSurfR | | * | |
| ef ); | | * This section describes the memory management functions of the low-level | |
| | | CUDA | |
| | | * driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
|
| /************************************ | | #if __CUDA_API_VERSION >= 3020 | |
| ** | | /** | |
| ** Parameter management | | * \brief Gets free and total memory | |
| ** | | * | |
| ***********************************/ | | * Returns in \p *free and \p *total respectively, the free and total amoun | |
| | | t of | |
| | | * memory available for allocation by the CUDA context, in bytes. | |
| | | * | |
| | | * \param free - Returned free memory in bytes | |
| | | * \param total - Returned total memory in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total); | |
| | | | |
|
| CUresult CUDAAPI cuParamSetSize (CUfunction hfunc, unsigned int numbyt | | /** | |
| es); | | * \brief Allocates device memory | |
| CUresult CUDAAPI cuParamSeti (CUfunction hfunc, int offset, unsigne | | * | |
| d int value); | | * Allocates \p bytesize bytes of linear memory on the device and returns i | |
| CUresult CUDAAPI cuParamSetf (CUfunction hfunc, int offset, float v | | n | |
| alue); | | * \p *dptr a pointer to the allocated memory. The allocated memory is suit | |
| CUresult CUDAAPI cuParamSetv (CUfunction hfunc, int offset, void *p | | ably | |
| tr, unsigned int numbytes); | | * aligned for any kind of variable. The memory is not cleared. If \p bytes | |
| CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtex | | ize | |
| ref hTexRef); | | * is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE. | |
| | | * | |
| | | * \param dptr - Returned device pointer | |
| | | * \param bytesize - Requested allocation size in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost, | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Allocates pitched device memory | |
| ** Launch functions | | * | |
| ** | | * Allocates at least \p WidthInBytes * \p Height bytes of linear memory on | |
| ***********************************/ | | * the device and returns in \p *dptr a pointer to the allocated memory. Th | |
| | | e | |
| | | * function may pad the allocation to ensure that corresponding pointers in | |
| | | * any given row will continue to meet the alignment requirements for | |
| | | * coalescing as the address is updated from row to row. \p ElementSizeByte | |
| | | s | |
| | | * specifies the size of the largest reads and writes that will be performe | |
| | | d | |
| | | * on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coales | |
| | | ced | |
| | | * memory transactions are not possible on other data sizes). If | |
| | | * \p ElementSizeBytes is smaller than the actual read/write size of a kern | |
| | | el, | |
| | | * the kernel will run correctly, but possibly at reduced speed. The pitch | |
| | | * returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of t | |
| | | he | |
| | | * allocation. The intended usage of pitch is as a separate parameter of th | |
| | | e | |
| | | * allocation, used to compute addresses within the 2D array. Given the row | |
| | | * and column of an array element of type \b T, the address is computed as: | |
| | | * \code | |
| | | T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; | |
| | | * \endcode | |
| | | * | |
| | | * The pitch returned by ::cuMemAllocPitch() is guaranteed to work with | |
| | | * ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it | |
| | | is | |
| | | * recommended that programmers consider performing pitch allocations using | |
| | | * ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this | |
| | | is | |
| | | * especially true if the application will be performing 2D memory copies | |
| | | * between different regions of device memory (whether linear memory or CUD | |
| | | A | |
| | | * arrays). | |
| | | * | |
| | | * The byte alignment of the pitch returned by ::cuMemAllocPitch() is guara | |
| | | nteed | |
| | | * to match or exceed the alignment requirement for texture binding with | |
| | | * ::cuTexRefSetAddress2D(). | |
| | | * | |
| | | * \param dptr - Returned device pointer | |
| | | * \param pPitch - Returned pitch of allocation in bytes | |
| | | * \param WidthInBytes - Requested allocation width in bytes | |
| | | * \param Height - Requested allocation height in rows | |
| | | * \param ElementSizeBytes - Size of largest reads/writes for range | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t | |
| | | WidthInBytes, size_t Height, unsigned int ElementSizeBytes); | |
| | | | |
|
| CUresult CUDAAPI cuLaunch ( CUfunction f ); | | /** | |
| CUresult CUDAAPI cuLaunchGrid (CUfunction f, int grid_width, int grid_h | | * \brief Frees device memory | |
| eight); | | * | |
| CUresult CUDAAPI cuLaunchGridAsync( CUfunction f, int grid_width, int g | | * Frees the memory space pointed to by \p dptr, which must have been retur | |
| rid_height, CUstream hStream ); | | ned | |
| | | * by a previous call to ::cuMemAlloc() or ::cuMemAllocPitch(). | |
| | | * | |
| | | * \param dptr - Pointer to memory to free | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Get information on memory allocations | |
| ** Events | | * | |
| ** | | * Returns the base address in \p *pbase and size in \p *psize of the | |
| ***********************************/ | | * allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the in | |
| CUresult CUDAAPI cuEventCreate( CUevent *phEvent, unsigned int Flags ); | | put | |
| CUresult CUDAAPI cuEventRecord( CUevent hEvent, CUstream hStream ); | | * pointer \p dptr. Both parameters \p pbase and \p psize are optional. If | |
| CUresult CUDAAPI cuEventQuery( CUevent hEvent ); | | one | |
| CUresult CUDAAPI cuEventSynchronize( CUevent hEvent ); | | * of them is NULL, it is ignored. | |
| CUresult CUDAAPI cuEventDestroy( CUevent hEvent ); | | * | |
| CUresult CUDAAPI cuEventElapsedTime( float *pMilliseconds, CUevent hSta | | * \param pbase - Returned base address | |
| rt, CUevent hEnd ); | | * \param psize - Returned size of device memory allocation | |
| | | * \param dptr - Device pointer to query | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CU | |
| | | deviceptr dptr); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Allocates page-locked host memory | |
| ** Streams | | * | |
| ** | | * Allocates \p bytesize bytes of host memory that is page-locked and | |
| ***********************************/ | | * accessible to the device. The driver tracks the virtual memory ranges | |
| CUresult CUDAAPI cuStreamCreate( CUstream *phStream, unsigned int Flag | | * allocated with this function and automatically accelerates calls to | |
| s ); | | * functions such as ::cuMemcpy(). Since the memory can be accessed directl | |
| CUresult CUDAAPI cuStreamQuery( CUstream hStream ); | | y by | |
| CUresult CUDAAPI cuStreamSynchronize( CUstream hStream ); | | * the device, it can be read or written with much higher bandwidth than | |
| CUresult CUDAAPI cuStreamDestroy( CUstream hStream ); | | * pageable memory obtained with functions such as ::malloc(). Allocating | |
| | | * excessive amounts of memory with ::cuMemAllocHost() may degrade system | |
| | | * performance, since it reduces the amount of memory available to the syst | |
| | | em | |
| | | * for paging. As a result, this function is best used sparingly to allocat | |
| | | e | |
| | | * staging areas for data exchange between host and device. | |
| | | * | |
| | | * \param pp - Returned host pointer to page-locked memory | |
| | | * \param bytesize - Requested allocation size in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Frees page-locked host memory | |
| ** Graphics interop | | * | |
| ** | | * Frees the memory space pointed to by \p p, which must have been returned | |
| ***********************************/ | | by | |
| CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resour | | * a previous call to ::cuMemAllocHost(). | |
| ce); | | * | |
| CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( CUarray *pArray, | | * \param p - Pointer to memory to free | |
| CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel | | * | |
| ); | | * \return | |
| CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( CUdeviceptr *pDevP | | * ::CUDA_SUCCESS, | |
| tr, unsigned int *pSize, CUgraphicsResource resource ); | | * ::CUDA_ERROR_DEINITIALIZED, | |
| CUresult CUDAAPI cuGraphicsResourceSetMapFlags( CUgraphicsResource reso | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| urce, unsigned int flags ); | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| CUresult CUDAAPI cuGraphicsMapResources( unsigned int count, CUgraphics | | * ::CUDA_ERROR_INVALID_VALUE | |
| Resource *resources, CUstream hStream ); | | * \notefnerr | |
| CUresult CUDAAPI cuGraphicsUnmapResources( unsigned int count, CUgraphi | | * | |
| csResource *resources, CUstream hStream ); | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemFreeHost(void *p); | |
| | | | |
|
| /************************************ | | /** | |
| ** | | * \brief Allocates page-locked host memory | |
| ** Export tables | | * | |
| ** | | * Allocates \p bytesize bytes of host memory that is page-locked and acces | |
| ***********************************/ | | sible | |
| CUresult CUDAAPI cuGetExportTable( const void **ppExportTable, const CU | | * to the device. The driver tracks the virtual memory ranges allocated wit | |
| uuid *pExportTableId ); | | h | |
| | | * this function and automatically accelerates calls to functions such as | |
| | | * ::cuMemcpyHtoD(). Since the memory can be accessed directly by the devic | |
| | | e, | |
| | | * it can be read or written with much higher bandwidth than pageable memor | |
| | | y | |
| | | * obtained with functions such as ::malloc(). Allocating excessive amounts | |
| | | of | |
| | | * pinned memory may degrade system performance, since it reduces the amoun | |
| | | t | |
| | | * of memory available to the system for paging. As a result, this function | |
| | | is | |
| | | * best used sparingly to allocate staging areas for data exchange between | |
| | | * host and device. | |
| | | * | |
| | | * The \p Flags parameter enables different options to be specified that | |
| | | * affect the allocation, as follows. | |
| | | * | |
| | | * - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be | |
| | | * considered as pinned memory by all CUDA contexts, not just the one tha | |
| | | t | |
| | | * performed the allocation. | |
| | | * | |
| | | * - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address | |
| | | * space. The device pointer to the memory may be obtained by calling | |
| | | * ::cuMemHostGetDevicePointer(). This feature is available only on GPUs | |
| | | * with compute capability greater than or equal to 1.1. | |
| | | * | |
| | | * - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combine | |
| | | d | |
| | | * (WC). WC memory can be transferred across the PCI Express bus more | |
| | | * quickly on some system configurations, but cannot be read efficiently | |
| | | by | |
| | | * most CPUs. WC memory is a good option for buffers that will be written | |
| | | by | |
| | | * the CPU and read by the GPU via mapped pinned memory or host->device | |
| | | * transfers. | |
| | | * | |
| | | * All of these flags are orthogonal to one another: a developer may alloca | |
| | | te | |
| | | * memory that is portable, mapped and/or write-combined with no restrictio | |
| | | ns. | |
| | | * | |
| | | * The CUDA context must have been created with the ::CU_CTX_MAP_HOST flag | |
| | | in | |
| | | * order for the ::CU_MEMHOSTALLOC_MAPPED flag to have any effect. | |
| | | * | |
| | | * The ::CU_MEMHOSTALLOC_MAPPED flag may be specified on CUDA contexts for | |
| | | * devices that do not support mapped pinned memory. The failure is deferre | |
| | | d | |
| | | * to ::cuMemHostGetDevicePointer() because the memory may be mapped into | |
| | | * other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag. | |
| | | * | |
| | | * The memory allocated by this function must be freed with ::cuMemFreeHost | |
| | | (). | |
| | | * | |
| | | * \param pp - Returned host pointer to page-locked memory | |
| | | * \param bytesize - Requested allocation size in bytes | |
| | | * \param Flags - Flags for allocation request | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Fl | |
| | | ags); | |
| | | | |
|
| /************************************ | | #if __CUDA_API_VERSION >= 3020 | |
| ** | | /** | |
| ** Limits | | * \brief Passes back device pointer of mapped pinned memory | |
| ** | | * | |
| ***********************************/ | | * Passes back the device pointer \p pdptr corresponding to the mapped, pin | |
| | | ned | |
| | | * host buffer \p p allocated by ::cuMemHostAlloc. | |
| | | * | |
| | | * ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMALLOCHOST_DEVICEM | |
| | | AP | |
| | | * flag was not specified at the time the memory was allocated, or if the | |
| | | * function is called on a GPU that does not support mapped pinned memory. | |
| | | * | |
| | | * \p Flags provides for future releases. For now, it must be set to 0. | |
| | | * | |
| | | * \param pdptr - Returned device pointer | |
| | | * \param p - Host pointer | |
| | | * \param Flags - Options (must be 0) | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, uns | |
| | | igned int Flags); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
|
| CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value); | | /** | |
| CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit); | | * \brief Passes back flags that were used for a pinned allocation | |
| | | * | |
| | | * Passes back the flags \p pFlags that were specified when allocating | |
| | | * the pinned host buffer \p p allocated by ::cuMemHostAlloc. | |
| | | * | |
| | | * ::cuMemHostGetFlags() will fail if the pointer does not reside in | |
| | | * an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc(). | |
| | | * | |
| | | * \param pFlags - Returned flags word | |
| | | * \param p - Host pointer | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuMemAllocHost, ::cuMemHostAlloc | |
| | | */ | |
| | | CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p); | |
| | | | |
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | /** | |
| | | * \brief Copies memory from Host to Device | |
| | | * | |
| | | * Copies from host memory to device memory. \p dstDevice and \p srcHost ar | |
| | | e | |
| | | * the base addresses of the destination and source, respectively. \p ByteC | |
| | | ount | |
| | | * specifies the number of bytes to copy. Note that this function is | |
| | | * synchronous. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param srcHost - Source host pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, s | |
| | | ize_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Device to Host | |
| | | * | |
| | | * Copies from device to host memory. \p dstHost and \p srcDevice specify t | |
| | | he | |
| | | * base pointers of the destination and source, respectively. \p ByteCount | |
| | | * specifies the number of bytes to copy. Note that this function is | |
| | | * synchronous. | |
| | | * | |
| | | * \param dstHost - Destination host pointer | |
| | | * \param srcDevice - Source device pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t | |
| | | ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Device to Device | |
| | | * | |
| | | * Copies from device memory to device memory. \p dstDevice and \p srcDevic | |
| | | e | |
| | | * are the base pointers of the destination and source, respectively. | |
| | | * \p ByteCount specifies the number of bytes to copy. Note that this funct | |
| | | ion | |
| | | * is asynchronous. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param srcDevice - Source device pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, | |
| | | size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Device to Array | |
| | | * | |
| | | * Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffs | |
| | | et | |
| | | * specify the CUDA array handle and starting index of the destination data | |
| | | . | |
| | | * \p srcDevice specifies the base pointer of the source. \p ByteCount | |
| | | * specifies the number of bytes to copy. | |
| | | * | |
| | | * \param dstArray - Destination array | |
| | | * \param dstOffset - Offset in bytes of destination array | |
| | | * \param srcDevice - Source device pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync | |
| | | , | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdevicep | |
| | | tr srcDevice, size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Array to Device | |
| | | * | |
| | | * Copies from one 1D CUDA array to device memory. \p dstDevice specifies t | |
| | | he | |
| | | * base pointer of the destination and must be naturally aligned with the C | |
| | | UDA | |
| | | * array elements. \p srcArray and \p srcOffset specify the CUDA array hand | |
| | | le | |
| | | * and the offset in bytes into the array where the copy is to begin. | |
| | | * \p ByteCount specifies the number of bytes to copy and must be evenly | |
| | | * divisible by the array element size. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param srcArray - Source array | |
| | | * \param srcOffset - Offset in bytes of source array | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size | |
| | | _t srcOffset, size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Host to Array | |
| | | * | |
| | | * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset | |
| | | * specify the CUDA array handle and starting offset in bytes of the destin | |
| | | ation | |
| | | * data. \p pSrc specifies the base address of the source. \p ByteCount sp | |
| | | ecifies | |
| | | * the number of bytes to copy. | |
| | | * | |
| | | * \param dstArray - Destination array | |
| | | * \param dstOffset - Offset in bytes of destination array | |
| | | * \param srcHost - Source host pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync, | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const voi | |
| | | d *srcHost, size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Array to Host | |
| | | * | |
| | | * Copies from one 1D CUDA array to host memory. \p dstHost specifies the b | |
| | | ase | |
| | | * pointer of the destination. \p srcArray and \p srcOffset specify the CUD | |
| | | A | |
| | | * array handle and starting offset in bytes of the source data. | |
| | | * \p ByteCount specifies the number of bytes to copy. | |
| | | * | |
| | | * \param dstHost - Destination device pointer | |
| | | * \param srcArray - Source array | |
| | | * \param srcOffset - Offset in bytes of source array | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync | |
| | | , | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOf | |
| | | fset, size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Array to Array | |
| | | * | |
| | | * Copies from one 1D CUDA array to another. \p dstArray and \p srcArray | |
| | | * specify the handles of the destination and source CUDA arrays for the co | |
| | | py, | |
| | | * respectively. \p dstOffset and \p srcOffset specify the destination and | |
| | | * source offsets in bytes into the CUDA arrays. \p ByteCount is the number | |
| | | of | |
| | | * bytes to be copied. The size of the elements in the CUDA arrays need not | |
| | | be | |
| | | * the same format, but the elements must be the same size; and count must | |
| | | be | |
| | | * evenly divisible by that size. | |
| | | * | |
| | | * \param dstArray - Destination array | |
| | | * \param dstOffset - Offset in bytes of destination array | |
| | | * \param srcArray - Source array | |
| | | * \param srcOffset - Offset in bytes of source array | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray s | |
| | | rcArray, size_t srcOffset, size_t ByteCount); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory for 2D arrays | |
| | | * | |
| | | * Perform a 2D memory copy according to the parameters specified in \p pCo | |
| | | py. | |
| | | * The ::CUDA_MEMCPY2D structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUDA_MEMCPY2D_st { | |
| | | unsigned int srcXInBytes, srcY; | |
| | | CUmemorytype srcMemoryType; | |
| | | const void *srcHost; | |
| | | CUdeviceptr srcDevice; | |
| | | CUarray srcArray; | |
| | | unsigned int srcPitch; | |
| | | | |
| | | unsigned int dstXInBytes, dstY; | |
| | | CUmemorytype dstMemoryType; | |
| | | void *dstHost; | |
| | | CUdeviceptr dstDevice; | |
| | | CUarray dstArray; | |
| | | unsigned int dstPitch; | |
| | | | |
| | | unsigned int WidthInBytes; | |
| | | unsigned int Height; | |
| | | } CUDA_MEMCPY2D; | |
| | | * \endcode | |
| | | * where: | |
| | | * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the | |
| | | * source and destination, respectively; ::CUmemorytype_enum is defined a | |
| | | s: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUmemorytype_enum { | |
| | | CU_MEMORYTYPE_HOST = 0x01, | |
| | | CU_MEMORYTYPE_DEVICE = 0x02, | |
| | | CU_MEMORYTYPE_ARRAY = 0x03 | |
| | | } CUmemorytype; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch | |
| | | * specify the (host) base address of the source data and the bytes per row | |
| | | to | |
| | | * apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch | |
| | | * specify the (device) base address of the source data and the bytes per r | |
| | | ow | |
| | | * to apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the | |
| | | * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch | |
| | | * specify the (host) base address of the destination data and the bytes pe | |
| | | r | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch | |
| | | * specify the (device) base address of the destination data and the bytes | |
| | | per | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the | |
| | | * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar | |
| | | e | |
| | | * ignored. | |
| | | * | |
| | | * - ::srcXInBytes and ::srcY specify the base address of the source data f | |
| | | or | |
| | | * the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the starting address is | |
| | | * \code | |
| | | void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::dstXInBytes and ::dstY specify the base address of the destination d | |
| | | ata | |
| | | * for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the base address is | |
| | | * \code | |
| | | void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::WidthInBytes and ::Height specify the width (in bytes) and height of | |
| | | * the 2D copy being performed. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * | |
| | | * \par | |
| | | * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum | |
| | | * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba | |
| | | ck | |
| | | * pitches that always work with ::cuMemcpy2D(). On intra-device memory cop | |
| | | ies | |
| | | * (device ? device, CUDA array ? device, CUDA array ? CUDA array), | |
| | | * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). | |
| | | * ::cuMemcpy2DUnaligned() does not have this restriction, but may run | |
| | | * significantly slower in the cases where ::cuMemcpy2D() would have return | |
| | | ed | |
| | | * an error code. | |
| | | * | |
| | | * \param pCopy - Parameters for the memory copy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory for 2D arrays | |
| | | * | |
| | | * Perform a 2D memory copy according to the parameters specified in \p pCo | |
| | | py. | |
| | | * The ::CUDA_MEMCPY2D structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUDA_MEMCPY2D_st { | |
| | | unsigned int srcXInBytes, srcY; | |
| | | CUmemorytype srcMemoryType; | |
| | | const void *srcHost; | |
| | | CUdeviceptr srcDevice; | |
| | | CUarray srcArray; | |
| | | unsigned int srcPitch; | |
| | | unsigned int dstXInBytes, dstY; | |
| | | CUmemorytype dstMemoryType; | |
| | | void *dstHost; | |
| | | CUdeviceptr dstDevice; | |
| | | CUarray dstArray; | |
| | | unsigned int dstPitch; | |
| | | unsigned int WidthInBytes; | |
| | | unsigned int Height; | |
| | | } CUDA_MEMCPY2D; | |
| | | * \endcode | |
| | | * where: | |
| | | * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the | |
| | | * source and destination, respectively; ::CUmemorytype_enum is defined a | |
| | | s: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUmemorytype_enum { | |
| | | CU_MEMORYTYPE_HOST = 0x01, | |
| | | CU_MEMORYTYPE_DEVICE = 0x02, | |
| | | CU_MEMORYTYPE_ARRAY = 0x03 | |
| | | } CUmemorytype; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch | |
| | | * specify the (host) base address of the source data and the bytes per row | |
| | | to | |
| | | * apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch | |
| | | * specify the (device) base address of the source data and the bytes per r | |
| | | ow | |
| | | * to apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the | |
| | | * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch | |
| | | * specify the (host) base address of the destination data and the bytes pe | |
| | | r | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch | |
| | | * specify the (device) base address of the destination data and the bytes | |
| | | per | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the | |
| | | * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar | |
| | | e | |
| | | * ignored. | |
| | | * | |
| | | * - ::srcXInBytes and ::srcY specify the base address of the source data f | |
| | | or | |
| | | * the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the starting address is | |
| | | * \code | |
| | | void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::dstXInBytes and ::dstY specify the base address of the destination d | |
| | | ata | |
| | | * for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the base address is | |
| | | * \code | |
| | | void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::WidthInBytes and ::Height specify the width (in bytes) and height of | |
| | | * the 2D copy being performed. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * | |
| | | * \par | |
| | | * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum | |
| | | * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba | |
| | | ck | |
| | | * pitches that always work with ::cuMemcpy2D(). On intra-device memory cop | |
| | | ies | |
| | | * (device ? device, CUDA array ? device, CUDA array ? CUDA array), | |
| | | * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). | |
| | | * ::cuMemcpy2DUnaligned() does not have this restriction, but may run | |
| | | * significantly slower in the cases where ::cuMemcpy2D() would have return | |
| | | ed | |
| | | * an error code. | |
| | | * | |
| | | * \param pCopy - Parameters for the memory copy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory for 3D arrays | |
| | | * | |
| | | * Perform a 3D memory copy according to the parameters specified in | |
| | | * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUDA_MEMCPY3D_st { | |
| | | | |
| | | unsigned int srcXInBytes, srcY, srcZ; | |
| | | unsigned int srcLOD; | |
| | | CUmemorytype srcMemoryType; | |
| | | const void *srcHost; | |
| | | CUdeviceptr srcDevice; | |
| | | CUarray srcArray; | |
| | | unsigned int srcPitch; // ignored when src is array | |
| | | unsigned int srcHeight; // ignored when src is array; may b | |
| | | e 0 if Depth==1 | |
| | | | |
| | | unsigned int dstXInBytes, dstY, dstZ; | |
| | | unsigned int dstLOD; | |
| | | CUmemorytype dstMemoryType; | |
| | | void *dstHost; | |
| | | CUdeviceptr dstDevice; | |
| | | CUarray dstArray; | |
| | | unsigned int dstPitch; // ignored when dst is array | |
| | | unsigned int dstHeight; // ignored when dst is array; may b | |
| | | e 0 if Depth==1 | |
| | | | |
| | | unsigned int WidthInBytes; | |
| | | unsigned int Height; | |
| | | unsigned int Depth; | |
| | | } CUDA_MEMCPY3D; | |
| | | * \endcode | |
| | | * where: | |
| | | * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the | |
| | | * source and destination, respectively; ::CUmemorytype_enum is defined a | |
| | | s: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUmemorytype_enum { | |
| | | CU_MEMORYTYPE_HOST = 0x01, | |
| | | CU_MEMORYTYPE_DEVICE = 0x02, | |
| | | CU_MEMORYTYPE_ARRAY = 0x03 | |
| | | } CUmemorytype; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and | |
| | | * ::srcHeight specify the (host) base address of the source data, the byte | |
| | | s | |
| | | * per row, and the height of each 2D slice of the 3D array. ::srcArray is | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an | |
| | | d | |
| | | * ::srcHeight specify the (device) base address of the source data, the by | |
| | | tes | |
| | | * per row, and the height of each 2D slice of the 3D array. ::srcArray is | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the | |
| | | * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and | |
| | | * ::srcHeight are ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch | |
| | | * specify the (host) base address of the destination data, the bytes per r | |
| | | ow, | |
| | | * and the height of each 2D slice of the 3D array. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch | |
| | | * specify the (device) base address of the destination data, the bytes per | |
| | | * row, and the height of each 2D slice of the 3D array. ::dstArray is igno | |
| | | red. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the | |
| | | * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and | |
| | | * ::dstHeight are ignored. | |
| | | * | |
| | | * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc | |
| | | e | |
| | | * data for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the starting address is | |
| | | * \code | |
| | | void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + src | |
| | | XInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - dstXInBytes, ::dstY and ::dstZ specify the base address of the | |
| | | * destination data for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the base address is | |
| | | * \code | |
| | | void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + | |
| | | dstXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInByt | |
| | | es; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei | |
| | | ght | |
| | | * and depth of the 3D copy being performed. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * - If specified, ::srcHeight must be greater than or equal to ::Height + | |
| | | * ::srcY, and ::dstHeight must be greater than or equal to ::Height + :: | |
| | | dstY. | |
| | | * | |
| | | * \par | |
| | | * ::cuMemcpy3D() returns an error if any pitch is greater than the maximum | |
| | | * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). | |
| | | * | |
| | | * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must | |
| | | be | |
| | | * set to 0. | |
| | | * | |
| | | * \param pCopy - Parameters for the memory copy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Host to Device | |
| | | * | |
| | | * Copies from host memory to device memory. \p dstDevice and \p srcHost ar | |
| | | e | |
| | | * the base addresses of the destination and source, respectively. \p ByteC | |
| | | ount | |
| | | * specifies the number of bytes to copy. | |
| | | * | |
| | | * ::cuMemcpyHtoDAsync() is asynchronous and can optionally be associated t | |
| | | o a | |
| | | * stream by passing a non-zero \p hStream argument. It only works on | |
| | | * page-locked memory and returns an error if a pointer to pageable memory | |
| | | is | |
| | | * passed as input. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param srcHost - Source host pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo | |
| | | st, size_t ByteCount, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Device to Host | |
| | | * | |
| | | * Copies from device to host memory. \p dstHost and \p srcDevice specify t | |
| | | he | |
| | | * base pointers of the destination and source, respectively. \p ByteCount | |
| | | * specifies the number of bytes to copy. | |
| | | * | |
| | | * ::cuMemcpyDtoHAsync() is asynchronous and can optionally be associated t | |
| | | o a | |
| | | * stream by passing a non-zero \p hStream argument. It only works on | |
| | | * page-locked memory and returns an error if a pointer to pageable memory | |
| | | is | |
| | | * passed as input. | |
| | | * | |
| | | * \param dstHost - Destination host pointer | |
| | | * \param srcDevice - Source device pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, si | |
| | | ze_t ByteCount, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Device to Device | |
| | | * | |
| | | * Copies from device memory to device memory. \p dstDevice and \p srcDevic | |
| | | e | |
| | | * are the base pointers of the destination and source, respectively. | |
| | | * \p ByteCount specifies the number of bytes to copy. Note that this funct | |
| | | ion | |
| | | * is asynchronous and can optionally be associated to a stream by passing | |
| | | a | |
| | | * non-zero \p hStream argument | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param srcDevice - Source device pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe | |
| | | vice, size_t ByteCount, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Host to Array | |
| | | * | |
| | | * Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset | |
| | | * specify the CUDA array handle and starting offset in bytes of the | |
| | | * destination data. \p srcHost specifies the base address of the source. | |
| | | * \p ByteCount specifies the number of bytes to copy. | |
| | | * | |
| | | * ::cuMemcpyHtoAAsync() is asynchronous and can optionally be associated t | |
| | | o a | |
| | | * stream by passing a non-zero \p hStream argument. It only works on | |
| | | * page-locked memory and returns an error if a pointer to pageable memory | |
| | | is | |
| | | * passed as input. | |
| | | * | |
| | | * \param dstArray - Destination array | |
| | | * \param dstOffset - Offset in bytes of destination array | |
| | | * \param srcHost - Source host pointer | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, cons | |
| | | t void *srcHost, size_t ByteCount, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory from Array to Host | |
| | | * | |
| | | * Copies from one 1D CUDA array to host memory. \p dstHost specifies the b | |
| | | ase | |
| | | * pointer of the destination. \p srcArray and \p srcOffset specify the CUD | |
| | | A | |
| | | * array handle and starting offset in bytes of the source data. | |
| | | * \p ByteCount specifies the number of bytes to copy. | |
| | | * | |
| | | * ::cuMemcpyAtoHAsync() is asynchronous and can optionally be associated t | |
| | | o a | |
| | | * stream by passing a non-zero \p stream argument. It only works on | |
| | | * page-locked host memory and returns an error if a pointer to pageable | |
| | | * memory is passed as input. | |
| | | * | |
| | | * \param dstHost - Destination pointer | |
| | | * \param srcArray - Source array | |
| | | * \param srcOffset - Offset in bytes of source array | |
| | | * \param ByteCount - Size of memory copy in bytes | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t | |
| | | srcOffset, size_t ByteCount, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory for 2D arrays | |
| | | * | |
| | | * Perform a 2D memory copy according to the parameters specified in \p pCo | |
| | | py. | |
| | | * The ::CUDA_MEMCPY2D structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUDA_MEMCPY2D_st { | |
| | | unsigned int srcXInBytes, srcY; | |
| | | CUmemorytype srcMemoryType; | |
| | | const void *srcHost; | |
| | | CUdeviceptr srcDevice; | |
| | | CUarray srcArray; | |
| | | unsigned int srcPitch; | |
| | | unsigned int dstXInBytes, dstY; | |
| | | CUmemorytype dstMemoryType; | |
| | | void *dstHost; | |
| | | CUdeviceptr dstDevice; | |
| | | CUarray dstArray; | |
| | | unsigned int dstPitch; | |
| | | unsigned int WidthInBytes; | |
| | | unsigned int Height; | |
| | | } CUDA_MEMCPY2D; | |
| | | * \endcode | |
| | | * where: | |
| | | * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the | |
| | | * source and destination, respectively; ::CUmemorytype_enum is defined a | |
| | | s: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUmemorytype_enum { | |
| | | CU_MEMORYTYPE_HOST = 0x01, | |
| | | CU_MEMORYTYPE_DEVICE = 0x02, | |
| | | CU_MEMORYTYPE_ARRAY = 0x03 | |
| | | } CUmemorytype; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch | |
| | | * specify the (host) base address of the source data and the bytes per row | |
| | | to | |
| | | * apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch | |
| | | * specify the (device) base address of the source data and the bytes per r | |
| | | ow | |
| | | * to apply. ::srcArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the | |
| | | * handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch | |
| | | * specify the (host) base address of the destination data and the bytes pe | |
| | | r | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch | |
| | | * specify the (device) base address of the destination data and the bytes | |
| | | per | |
| | | * row to apply. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the | |
| | | * handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar | |
| | | e | |
| | | * ignored. | |
| | | * | |
| | | * - ::srcXInBytes and ::srcY specify the base address of the source data f | |
| | | or | |
| | | * the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the starting address is | |
| | | * \code | |
| | | void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::dstXInBytes and ::dstY specify the base address of the destination d | |
| | | ata | |
| | | * for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the base address is | |
| | | * \code | |
| | | void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::WidthInBytes and ::Height specify the width (in bytes) and height of | |
| | | * the 2D copy being performed. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * - If specified, ::srcHeight must be greater than or equal to ::Height + | |
| | | * ::srcY, and ::dstHeight must be greater than or equal to ::Height + :: | |
| | | dstY. | |
| | | * | |
| | | * \par | |
| | | * ::cuMemcpy2D() returns an error if any pitch is greater than the maximum | |
| | | * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba | |
| | | ck | |
| | | * pitches that always work with ::cuMemcpy2D(). On intra-device memory cop | |
| | | ies | |
| | | * (device ? device, CUDA array ? device, CUDA array ? CUDA array), | |
| | | * ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). | |
| | | * ::cuMemcpy2DUnaligned() does not have this restriction, but may run | |
| | | * significantly slower in the cases where ::cuMemcpy2D() would have return | |
| | | ed | |
| | | * an error code. | |
| | | * | |
| | | * ::cuMemcpy2DAsync() is asynchronous and can optionally be associated to | |
| | | a | |
| | | * stream by passing a non-zero \p hStream argument. It only works on | |
| | | * page-locked host memory and returns an error if a pointer to pageable | |
| | | * memory is passed as input. | |
| | | * | |
| | | * \param pCopy - Parameters for the memory copy | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre | |
| | | am); | |
| | | | |
| | | /** | |
| | | * \brief Copies memory for 3D arrays | |
| | | * | |
| | | * Perform a 3D memory copy according to the parameters specified in | |
| | | * \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct CUDA_MEMCPY3D_st { | |
| | | | |
| | | unsigned int srcXInBytes, srcY, srcZ; | |
| | | unsigned int srcLOD; | |
| | | CUmemorytype srcMemoryType; | |
| | | const void *srcHost; | |
| | | CUdeviceptr srcDevice; | |
| | | CUarray srcArray; | |
| | | unsigned int srcPitch; // ignored when src is array | |
| | | unsigned int srcHeight; // ignored when src is array; may b | |
| | | e 0 if Depth==1 | |
| | | | |
| | | unsigned int dstXInBytes, dstY, dstZ; | |
| | | unsigned int dstLOD; | |
| | | CUmemorytype dstMemoryType; | |
| | | void *dstHost; | |
| | | CUdeviceptr dstDevice; | |
| | | CUarray dstArray; | |
| | | unsigned int dstPitch; // ignored when dst is array | |
| | | unsigned int dstHeight; // ignored when dst is array; may b | |
| | | e 0 if Depth==1 | |
| | | | |
| | | unsigned int WidthInBytes; | |
| | | unsigned int Height; | |
| | | unsigned int Depth; | |
| | | } CUDA_MEMCPY3D; | |
| | | * \endcode | |
| | | * where: | |
| | | * - ::srcMemoryType and ::dstMemoryType specify the type of memory of the | |
| | | * source and destination, respectively; ::CUmemorytype_enum is defined a | |
| | | s: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUmemorytype_enum { | |
| | | CU_MEMORYTYPE_HOST = 0x01, | |
| | | CU_MEMORYTYPE_DEVICE = 0x02, | |
| | | CU_MEMORYTYPE_ARRAY = 0x03 | |
| | | } CUmemorytype; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and | |
| | | * ::srcHeight specify the (host) base address of the source data, the byte | |
| | | s | |
| | | * per row, and the height of each 2D slice of the 3D array. ::srcArray is | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an | |
| | | d | |
| | | * ::srcHeight specify the (device) base address of the source data, the by | |
| | | tes | |
| | | * per row, and the height of each 2D slice of the 3D array. ::srcArray is | |
| | | * ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the | |
| | | * handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and | |
| | | * ::srcHeight are ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch | |
| | | * specify the (host) base address of the destination data, the bytes per r | |
| | | ow, | |
| | | * and the height of each 2D slice of the 3D array. ::dstArray is ignored. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch | |
| | | * specify the (device) base address of the destination data, the bytes per | |
| | | * row, and the height of each 2D slice of the 3D array. ::dstArray is igno | |
| | | red. | |
| | | * | |
| | | * \par | |
| | | * If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the | |
| | | * handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and | |
| | | * ::dstHeight are ignored. | |
| | | * | |
| | | * - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc | |
| | | e | |
| | | * data for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the starting address is | |
| | | * \code | |
| | | void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + src | |
| | | XInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::srcXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - dstXInBytes, ::dstY and ::dstZ specify the base address of the | |
| | | * destination data for the copy. | |
| | | * | |
| | | * \par | |
| | | * For host pointers, the base address is | |
| | | * \code | |
| | | void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + | |
| | | dstXInBytes); | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For device pointers, the starting address is | |
| | | * \code | |
| | | CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInByt | |
| | | es; | |
| | | * \endcode | |
| | | * | |
| | | * \par | |
| | | * For CUDA arrays, ::dstXInBytes must be evenly divisible by the array | |
| | | * element size. | |
| | | * | |
| | | * - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei | |
| | | ght | |
| | | * and depth of the 3D copy being performed. | |
| | | * - If specified, ::srcPitch must be greater than or equal to ::WidthInByt | |
| | | es + | |
| | | * ::srcXInBytes, and ::dstPitch must be greater than or equal to | |
| | | * ::WidthInBytes + dstXInBytes. | |
| | | * - If specified, ::srcHeight must be greater than or equal to ::Height + | |
| | | * ::srcY, and ::dstHeight must be greater than or equal to ::Height + :: | |
| | | dstY. | |
| | | * | |
| | | * \par | |
| | | * ::cuMemcpy3D() returns an error if any pitch is greater than the maximum | |
| | | * allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). | |
| | | * | |
| | | * ::cuMemcpy3DAsync() is asynchronous and can optionally be associated to | |
| | | a | |
| | | * stream by passing a non-zero \p hStream argument. It only works on | |
| | | * page-locked host memory and returns an error if a pointer to pageable | |
| | | * memory is passed as input. | |
| | | * | |
| | | * The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must | |
| | | be | |
| | | * set to 0. | |
| | | * | |
| | | * \param pCopy - Parameters for the memory copy | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre | |
| | | am); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the memory range of \p N 8-bit values to the specified value | |
| | | * \p uc. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param uc - Value to set | |
| | | * \param N - Number of elements | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t | |
| | | N); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the memory range of \p N 16-bit values to the specified value | |
| | | * \p us. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param us - Value to set | |
| | | * \param N - Number of elements | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size | |
| | | _t N); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the memory range of \p N 32-bit values to the specified value | |
| | | * \p ui. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param ui - Value to set | |
| | | * \param N - Number of elements | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t | |
| | | N); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 8-bit values to the specified value | |
| | | * \p uc. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param uc - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsig | |
| | | ned char uc, size_t Width, size_t Height); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 16-bit values to the specified valu | |
| | | e | |
| | | * \p us. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param us - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsi | |
| | | gned short us, size_t Width, size_t Height); | |
| | | | |
| | | /** | |
| | | * \brief Initializes device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 32-bit values to the specified valu | |
| | | e | |
| | | * \p ui. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param ui - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsi | |
| | | gned int ui, size_t Width, size_t Height); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the memory range of \p N 8-bit values to the specified value | |
| | | * \p uc. | |
| | | * | |
| | | * ::cuMemsetD8Async() is asynchronous and can optionally be associated to | |
| | | a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param uc - Value to set | |
| | | * \param N - Number of elements | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, s | |
| | | ize_t N, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the memory range of \p N 16-bit values to the specified value | |
| | | * \p us. | |
| | | * | |
| | | * ::cuMemsetD16Async() is asynchronous and can optionally be associated to | |
| | | a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param us - Value to set | |
| | | * \param N - Number of elements | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, | |
| | | size_t N, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the memory range of \p N 32-bit values to the specified value | |
| | | * \p ui. | |
| | | * | |
| | | * ::cuMemsetD32Async() is asynchronous and can optionally be associated to | |
| | | a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param ui - Value to set | |
| | | * \param N - Number of elements | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cu | |
| | | MemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, s | |
| | | ize_t N, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 8-bit values to the specified value | |
| | | * \p uc. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * ::cuMemsetD2D8Async() is asynchronous and can optionally be associated t | |
| | | o a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param uc - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A | |
| | | sync, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, | |
| | | unsigned char uc, size_t Width, size_t Height, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 16-bit values to the specified valu | |
| | | e | |
| | | * \p us. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * ::cuMemsetD2D16Async() is asynchronous and can optionally be associated | |
| | | to a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param us - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, | |
| | | unsigned short us, size_t Width, size_t Height, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Sets device memory | |
| | | * | |
| | | * Sets the 2D memory range of \p Width 32-bit values to the specified valu | |
| | | e | |
| | | * \p ui. \p Height specifies the number of rows to set, and \p dstPitch | |
| | | * specifies the number of bytes between each row. This function performs | |
| | | * fastest when the pitch is one that has been passed back by | |
| | | * ::cuMemAllocPitch(). | |
| | | * | |
| | | * ::cuMemsetD2D32Async() is asynchronous and can optionally be associated | |
| | | to a | |
| | | * stream by passing a non-zero \p stream argument. | |
| | | * | |
| | | * \param dstDevice - Destination device pointer | |
| | | * \param dstPitch - Pitch of destination device pointer | |
| | | * \param ui - Value to set | |
| | | * \param Width - Width of row | |
| | | * \param Height - Number of rows | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, | |
| | | * ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, | |
| | | * ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, | |
| | | * ::cuMemsetD32, ::cuMemsetD32Async | |
| | | */ | |
| | | CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, | |
| | | unsigned int ui, size_t Width, size_t Height, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Creates a 1D or 2D CUDA array | |
| | | * | |
| | | * Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure | |
| | | * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan | |
| | | dle. | |
| | | * The ::CUDA_ARRAY_DESCRIPTOR is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct { | |
| | | unsigned int Width; | |
| | | unsigned int Height; | |
| | | CUarray_format Format; | |
| | | unsigned int NumChannels; | |
| | | } CUDA_ARRAY_DESCRIPTOR; | |
| | | * \endcode | |
| | | * where: | |
| | | * | |
| | | * - \p Width, and \p Height are the width, and height of the CUDA array (i | |
| | | n | |
| | | * elements); the CUDA array is one-dimensional if height is 0, two-dimensi | |
| | | onal | |
| | | * otherwise; | |
| | | * - ::Format specifies the format of the elements; ::CUarray_format is | |
| | | * defined as: | |
| | | * \code | |
| | | typedef enum CUarray_format_enum { | |
| | | CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, | |
| | | CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, | |
| | | CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, | |
| | | CU_AD_FORMAT_SIGNED_INT8 = 0x08, | |
| | | CU_AD_FORMAT_SIGNED_INT16 = 0x09, | |
| | | CU_AD_FORMAT_SIGNED_INT32 = 0x0a, | |
| | | CU_AD_FORMAT_HALF = 0x10, | |
| | | CU_AD_FORMAT_FLOAT = 0x20 | |
| | | } CUarray_format; | |
| | | * \endcode | |
| | | * - \p NumChannels specifies the number of packed components per CUDA arra | |
| | | y | |
| | | * element; it may be 1, 2, or 4; | |
| | | * | |
| | | * Here are examples of CUDA array descriptions: | |
| | | * | |
| | | * Description for a CUDA array of 2048 floats: | |
| | | * \code | |
| | | CUDA_ARRAY_DESCRIPTOR desc; | |
| | | desc.Format = CU_AD_FORMAT_FLOAT; | |
| | | desc.NumChannels = 1; | |
| | | desc.Width = 2048; | |
| | | desc.Height = 1; | |
| | | * \endcode | |
| | | * | |
| | | * Description for a 64 x 64 CUDA array of floats: | |
| | | * \code | |
| | | CUDA_ARRAY_DESCRIPTOR desc; | |
| | | desc.Format = CU_AD_FORMAT_FLOAT; | |
| | | desc.NumChannels = 1; | |
| | | desc.Width = 64; | |
| | | desc.Height = 64; | |
| | | * \endcode | |
| | | * | |
| | | * Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit | |
| | | * float16's: | |
| | | * \code | |
| | | CUDA_ARRAY_DESCRIPTOR desc; | |
| | | desc.FormatFlags = CU_AD_FORMAT_HALF; | |
| | | desc.NumChannels = 4; | |
| | | desc.Width = width; | |
| | | desc.Height = height; | |
| | | * \endcode | |
| | | * | |
| | | * Description for a \p width x \p height CUDA array of 16-bit elements, ea | |
| | | ch | |
| | | * of which is two 8-bit unsigned chars: | |
| | | * \code | |
| | | CUDA_ARRAY_DESCRIPTOR arrayDesc; | |
| | | desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8; | |
| | | desc.NumChannels = 2; | |
| | | desc.Width = width; | |
| | | desc.Height = height; | |
| | | * \endcode | |
| | | * | |
| | | * \param pHandle - Returned array | |
| | | * \param pAllocateArray - Array descriptor | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO | |
| | | R *pAllocateArray); | |
| | | | |
| | | /** | |
| | | * \brief Get a 1D or 2D CUDA array descriptor | |
| | | * | |
| | | * Returns in \p *pArrayDescriptor a descriptor containing information on t | |
| | | he | |
| | | * format and dimensions of the CUDA array \p hArray. It is useful for | |
| | | * subroutines that have been passed a CUDA array, but need to know the CUD | |
| | | A | |
| | | * array parameters for validation or other purposes. | |
| | | * | |
| | | * \param pArrayDescriptor - Returned array descriptor | |
| | | * \param hArray - Array to get descriptor of | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost, | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript | |
| | | or, CUarray hArray); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** | |
| | | * \brief Destroys a CUDA array | |
| | | * | |
| | | * Destroys the CUDA array \p hArray. | |
| | | * | |
| | | * \param hArray - Array to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_ARRAY_IS_MAPPED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuArrayDestroy(CUarray hArray); | |
| | | | |
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | /** | |
| | | * \brief Creates a 3D CUDA array | |
| | | * | |
| | | * Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structur | |
| | | e | |
| | | * \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan | |
| | | dle. | |
| | | * The ::CUDA_ARRAY3D_DESCRIPTOR is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef struct { | |
| | | unsigned int Width; | |
| | | unsigned int Height; | |
| | | unsigned int Depth; | |
| | | CUarray_format Format; | |
| | | unsigned int NumChannels; | |
| | | unsigned int Flags; | |
| | | } CUDA_ARRAY3D_DESCRIPTOR; | |
| | | * \endcode | |
| | | * where: | |
| | | * | |
| | | * - \p Width, \p Height, and \p Depth are the width, height, and depth of | |
| | | the | |
| | | * CUDA array (in elements); the CUDA array is one-dimensional if height an | |
| | | d | |
| | | * depth are 0, two-dimensional if depth is 0, and three-dimensional otherw | |
| | | ise; | |
| | | * - ::Format specifies the format of the elements; ::CUarray_format is | |
| | | * defined as: | |
| | | * \code | |
| | | typedef enum CUarray_format_enum { | |
| | | CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, | |
| | | CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, | |
| | | CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, | |
| | | CU_AD_FORMAT_SIGNED_INT8 = 0x08, | |
| | | CU_AD_FORMAT_SIGNED_INT16 = 0x09, | |
| | | CU_AD_FORMAT_SIGNED_INT32 = 0x0a, | |
| | | CU_AD_FORMAT_HALF = 0x10, | |
| | | CU_AD_FORMAT_FLOAT = 0x20 | |
| | | } CUarray_format; | |
| | | * \endcode | |
| | | * - \p NumChannels specifies the number of packed components per CUDA arra | |
| | | y | |
| | | * element; it may be 1, 2, or 4; | |
| | | * - ::Flags may be set to ::CUDA_ARRAY3D_SURFACE_LDST to enable surface re | |
| | | ferences | |
| | | * to be bound to the CUDA array. If this flag is not set, ::cuSurfRefSetA | |
| | | rray | |
| | | * will fail when attempting to bind the CUDA array to a surface reference. | |
| | | * | |
| | | * Here are examples of CUDA array descriptions: | |
| | | * | |
| | | * Description for a CUDA array of 2048 floats: | |
| | | * \code | |
| | | CUDA_ARRAY3D_DESCRIPTOR desc; | |
| | | desc.Format = CU_AD_FORMAT_FLOAT; | |
| | | desc.NumChannels = 1; | |
| | | desc.Width = 2048; | |
| | | desc.Height = 0; | |
| | | desc.Depth = 0; | |
| | | * \endcode | |
| | | * | |
| | | * Description for a 64 x 64 CUDA array of floats: | |
| | | * \code | |
| | | CUDA_ARRAY3D_DESCRIPTOR desc; | |
| | | desc.Format = CU_AD_FORMAT_FLOAT; | |
| | | desc.NumChannels = 1; | |
| | | desc.Width = 64; | |
| | | desc.Height = 64; | |
| | | desc.Depth = 0; | |
| | | * \endcode | |
| | | * | |
| | | * Description for a \p width x \p height x \p depth CUDA array of 64-bit, | |
| | | * 4x16-bit float16's: | |
| | | * \code | |
| | | CUDA_ARRAY3D_DESCRIPTOR desc; | |
| | | desc.FormatFlags = CU_AD_FORMAT_HALF; | |
| | | desc.NumChannels = 4; | |
| | | desc.Width = width; | |
| | | desc.Height = height; | |
| | | desc.Depth = depth; | |
| | | * \endcode | |
| | | * | |
| | | * \param pHandle - Returned array | |
| | | * \param pAllocateArray - 3D array descriptor | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DGetDescriptor, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR | |
| | | IPTOR *pAllocateArray); | |
| | | | |
| | | /** | |
| | | * \brief Get a 3D CUDA array descriptor | |
| | | * | |
| | | * Returns in \p *pArrayDescriptor a descriptor containing information on t | |
| | | he | |
| | | * format and dimensions of the CUDA array \p hArray. It is useful for | |
| | | * subroutines that have been passed a CUDA array, but need to know the CUD | |
| | | A | |
| | | * array parameters for validation or other purposes. | |
| | | * | |
| | | * This function may be called on 1D and 2D arrays, in which case the \p He | |
| | | ight | |
| | | * and/or \p Depth members of the descriptor struct will be set to 0. | |
| | | * | |
| | | * \param pArrayDescriptor - Returned 3D array descriptor | |
| | | * \param hArray - 3D array to get descriptor of | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuArray3DCreate, ::cuArrayCreate, | |
| | | * ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost | |
| | | , | |
| | | * ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne | |
| | | d, | |
| | | * ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, | |
| | | * ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c | |
| | | uMemcpyDtoDAsync, | |
| | | * ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync | |
| | | , | |
| | | * ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, | |
| | | * ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, | |
| | | * ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, | |
| | | * ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32 | |
| | | */ | |
| | | CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc | |
| | | riptor, CUarray hArray); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** @} */ /* END CUDA_MEM */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_STREAM Stream Management | |
| | | * | |
| | | * This section describes the stream management functions of the low-level | |
| | | CUDA | |
| | | * driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Create a stream | |
| | | * | |
| | | * Creates a stream and returns a handle in \p phStream. \p Flags is requir | |
| | | ed | |
| | | * to be 0. | |
| | | * | |
| | | * \param phStream - Returned newly created stream | |
| | | * \param Flags - Parameters for stream creation (must be 0) | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuStreamDestroy, | |
| | | * ::cuStreamWaitEvent, | |
| | | * ::cuStreamQuery, | |
| | | * ::cuStreamSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags); | |
| | | | |
| | | /** | |
| | | * \brief Make a compute stream wait on an event | |
| | | * | |
| | | * Makes all future work submitted to \p hStream wait until \p hEvent | |
| | | * reports completion before beginning execution. This synchronization | |
| | | * will be performed efficiently on the device. | |
| | | * | |
| | | * The stream \p hStream will wait only for the completion of the most rece | |
| | | nt | |
| | | * host call to ::cuEventRecord() on \p hEvent. Once this call has returne | |
| | | d, | |
| | | * any functions (including ::cuEventRecord() and ::cuEventDestroy()) may b | |
| | | e | |
| | | * called on \p hEvent again, and the subsequent calls will not have any | |
| | | * effect on \p hStream. | |
| | | * | |
| | | * If \p hStream is 0 (the NULL stream) any future work submitted in any st | |
| | | ream | |
| | | * will wait for \p hEvent to complete before beginning execution. This | |
| | | * effectively creates a barrier for all future work submitted to the conte | |
| | | xt. | |
| | | * | |
| | | * If ::cuEventRecord() has not been called on \p hEvent, this call acts as | |
| | | if | |
| | | * the record has already completed, and so is a functional no-op. | |
| | | * | |
| | | * \param hStream - Stream to wait | |
| | | * \param hEvent - Event to wait on (may not be NULL) | |
| | | * \param Flags - Parameters for the operation (must be 0) | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuStreamCreate, | |
| | | * ::cuEventRecord, | |
| | | * ::cuStreamQuery, | |
| | | * ::cuStreamSynchronize, | |
| | | * ::cuStreamDestroy | |
| | | */ | |
| | | CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsign | |
| | | ed int Flags); | |
| | | | |
| | | /** | |
| | | * \brief Determine status of a compute stream | |
| | | * | |
| | | * Returns ::CUDA_SUCCESS if all operations in the stream specified by | |
| | | * \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not. | |
| | | * | |
| | | * \param hStream - Stream to query status of | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_NOT_READY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuStreamCreate, | |
| | | * ::cuStreamWaitEvent, | |
| | | * ::cuStreamDestroy, | |
| | | * ::cuStreamSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuStreamQuery(CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Wait until a stream's tasks are completed | |
| | | * | |
| | | * Waits until the device has completed all operations in the stream specif | |
| | | ied | |
| | | * by \p hStream. If the context was created with the ::CU_CTX_BLOCKING_SYN | |
| | | C | |
| | | * flag, the CPU thread will block until the stream is finished with all of | |
| | | * its tasks. | |
| | | * | |
| | | * \param hStream - Stream to wait for | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuStreamCreate, | |
| | | * ::cuStreamDestroy, | |
| | | * ::cuStreamWaitEvent, | |
| | | * ::cuStreamQuery | |
| | | */ | |
| | | CUresult CUDAAPI cuStreamSynchronize(CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Destroys a stream | |
| | | * | |
| | | * Destroys the stream specified by \p hStream. | |
| | | * | |
| | | * \param hStream - Stream to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuStreamCreate, | |
| | | * ::cuStreamWaitEvent, | |
| | | * ::cuStreamQuery, | |
| | | * ::cuStreamSynchronize | |
| | | */ | |
| | | CUresult CUDAAPI cuStreamDestroy(CUstream hStream); | |
| | | | |
| | | /** @} */ /* END CUDA_STREAM */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_EVENT Event Management | |
| | | * | |
| | | * This section describes the event management functions of the low-level C | |
| | | UDA | |
| | | * driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Creates an event | |
| | | * | |
| | | * Creates an event *phEvent with the flags specified via \p Flags. Valid f | |
| | | lags | |
| | | * include: | |
| | | * - ::CU_EVENT_DEFAULT: Default event creation flag. | |
| | | * - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use | |
| | | blocking | |
| | | * synchronization. A CPU thread that uses ::cuEventSynchronize() to wai | |
| | | t on | |
| | | * an event created with this flag will block until the event has actuall | |
| | | y | |
| | | * been recorded. | |
| | | * - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not n | |
| | | eed | |
| | | * to record timing data. Events created with this flag specified and | |
| | | * the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best | |
| | | * performance when used with ::cuStreamWaitEvent() and ::cuEventQuery(). | |
| | | * | |
| | | * \param phEvent - Returns newly created event | |
| | | * \param Flags - Event creation flags | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_OUT_OF_MEMORY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuEventRecord, | |
| | | * ::cuEventQuery, | |
| | | * ::cuEventSynchronize, | |
| | | * ::cuEventDestroy, | |
| | | * ::cuEventElapsedTime | |
| | | */ | |
| | | CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags); | |
| | | | |
| | | /** | |
| | | * \brief Records an event | |
| | | * | |
| | | * Records an event. If \p hStream is non-zero, the event is recorded after | |
| | | all | |
| | | * preceding operations in \p hStream have been completed; otherwise, it is | |
| | | * recorded after all preceding operations in the CUDA context have been | |
| | | * completed. Since operation is asynchronous, ::cuEventQuery and/or | |
| | | * ::cuEventSynchronize() must be used to determine when the event has actu | |
| | | ally | |
| | | * been recorded. | |
| | | * | |
| | | * If ::cuEventRecord() has previously been called on \p hEvent, then this | |
| | | * call will overwrite any existing state in \p hEvent. Any subsequent cal | |
| | | ls | |
| | | * which examine the status of \p hEvent will only examine the completion o | |
| | | f | |
| | | * this most recent call to ::cuEventRecord(). | |
| | | * | |
| | | * \param hEvent - Event to record | |
| | | * \param hStream - Stream to record event for | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuEventCreate, | |
| | | * ::cuEventQuery, | |
| | | * ::cuEventSynchronize, | |
| | | * ::cuStreamWaitEvent, | |
| | | * ::cuEventDestroy, | |
| | | * ::cuEventElapsedTime | |
| | | */ | |
| | | CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Queries an event's status | |
| | | * | |
| | | * Query the status of all device work preceding the most recent | |
| | | * call to ::cuEventRecord() (in the appropriate compute streams, | |
| | | * as specified by the arguments to ::cuEventRecord()). | |
| | | * | |
| | | * If this work has successfully been completed by the device, or if | |
| | | * ::cuEventRecord() has not been called on \p hEvent, then ::CUDA_SUCCESS | |
| | | is | |
| | | * returned. If this work has not yet been completed by the device then | |
| | | * ::CUDA_ERROR_NOT_READY is returned. | |
| | | * | |
| | | * \param hEvent - Event to query | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_NOT_READY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuEventCreate, | |
| | | * ::cuEventRecord, | |
| | | * ::cuEventSynchronize, | |
| | | * ::cuEventDestroy, | |
| | | * ::cuEventElapsedTime | |
| | | */ | |
| | | CUresult CUDAAPI cuEventQuery(CUevent hEvent); | |
| | | | |
| | | /** | |
| | | * \brief Waits for an event to complete | |
| | | * | |
| | | * Wait until the completion of all device work preceding the most recent | |
| | | * call to ::cuEventRecord() (in the appropriate compute streams, as specif | |
| | | ied | |
| | | * by the arguments to ::cuEventRecord()). | |
| | | * | |
| | | * If ::cuEventRecord() has not been called on \p hEvent, ::CUDA_SUCCESS is | |
| | | * returned immediately. | |
| | | * | |
| | | * Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC | |
| | | * flag will cause the calling CPU thread to block until the event has | |
| | | * been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has | |
| | | * not been set, then the CPU thread will busy-wait until the event has | |
| | | * been completed by the device. | |
| | | * | |
| | | * \param hEvent - Event to wait for | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuEventCreate, | |
| | | * ::cuEventRecord, | |
| | | * ::cuEventQuery, | |
| | | * ::cuEventDestroy, | |
| | | * ::cuEventElapsedTime | |
| | | */ | |
| | | CUresult CUDAAPI cuEventSynchronize(CUevent hEvent); | |
| | | | |
| | | /** | |
| | | * \brief Destroys an event | |
| | | * | |
| | | * Destroys the event specified by \p hEvent. | |
| | | * | |
| | | * \param hEvent - Event to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuEventCreate, | |
| | | * ::cuEventRecord, | |
| | | * ::cuEventQuery, | |
| | | * ::cuEventSynchronize, | |
| | | * ::cuEventElapsedTime | |
| | | */ | |
| | | CUresult CUDAAPI cuEventDestroy(CUevent hEvent); | |
| | | | |
| | | /** | |
| | | * \brief Computes the elapsed time between two events | |
| | | * | |
| | | * Computes the elapsed time between two events (in milliseconds with a | |
| | | * resolution of around 0.5 microseconds). | |
| | | * | |
| | | * If either event was last recorded in a non-NULL stream, the resulting ti | |
| | | me | |
| | | * may be greater than expected (even if both used the same stream handle). | |
| | | This | |
| | | * happens because the ::cuEventRecord() operation takes place asynchronous | |
| | | ly | |
| | | * and there is no guarantee that the measured latency is actually just bet | |
| | | ween | |
| | | * the two events. Any number of other different stream operations could ex | |
| | | ecute | |
| | | * in between the two measured events, thus altering the timing in a signif | |
| | | icant | |
| | | * way. | |
| | | * | |
| | | * If ::cuEventRecord() has not been called on either event then | |
| | | * ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been c | |
| | | alled | |
| | | * on both events but one or both of them has not yet been completed (that | |
| | | is, | |
| | | * ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of | |
| | | the | |
| | | * events), ::CUDA_ERROR_NOT_READY is returned. If either event was created | |
| | | with | |
| | | * the ::CU_EVENT_DISABLE_TIMING flag, then this function will return | |
| | | * ::CUDA_ERROR_INVALID_HANDLE. | |
| | | * | |
| | | * \param pMilliseconds - Time between \p hStart and \p hEnd in ms | |
| | | * \param hStart - Starting event | |
| | | * \param hEnd - Ending event | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_NOT_READY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuEventCreate, | |
| | | * ::cuEventRecord, | |
| | | * ::cuEventQuery, | |
| | | * ::cuEventSynchronize, | |
| | | * ::cuEventDestroy | |
| | | */ | |
| | | CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, C | |
| | | Uevent hEnd); | |
| | | | |
| | | /** @} */ /* END CUDA_EVENT */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_EXEC Execution Control | |
| | | * | |
| | | * This section describes the execution control functions of the low-level | |
| | | CUDA | |
| | | * driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Sets the block-dimensions for the function | |
| | | * | |
| | | * Specifies the \p x, \p y, and \p z dimensions of the thread blocks that | |
| | | are | |
| | | * created when the kernel given by \p hfunc is launched. | |
| | | * | |
| | | * \param hfunc - Kernel to specify dimensions of | |
| | | * \param x - X dimension | |
| | | * \param y - Y dimension | |
| | | * \param z - Z dimension | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetSharedSize, | |
| | | * ::cuFuncSetCacheConfig, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) | |
| | | ; | |
| | | | |
| | | /** | |
| | | * \brief Sets the dynamic shared-memory size for the function | |
| | | * | |
| | | * Sets through \p bytes the amount of dynamic shared memory that will be | |
| | | * available to each thread block when the kernel given by \p hfunc is laun | |
| | | ched. | |
| | | * | |
| | | * \param hfunc - Kernel to specify dynamic shared-memory size for | |
| | | * \param bytes - Dynamic shared-memory size per thread in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetCacheConfig, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes); | |
| | | | |
| | | /** | |
| | | * \brief Returns information about a function | |
| | | * | |
| | | * Returns in \p *pi the integer value of the attribute \p attrib on the ke | |
| | | rnel | |
| | | * given by \p hfunc. The supported attributes are: | |
| | | * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threa | |
| | | ds | |
| | | * per block, beyond which a launch of the function would fail. This numb | |
| | | er | |
| | | * depends on both the function and the device on which the function is | |
| | | * currently loaded. | |
| | | * - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of | |
| | | * statically-allocated shared memory per block required by this function | |
| | | . | |
| | | * This does not include dynamically-allocated shared memory requested by | |
| | | * the user at runtime. | |
| | | * - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-alloca | |
| | | ted | |
| | | * constant memory required by this function. | |
| | | * - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memor | |
| | | y | |
| | | * used by each thread of this function. | |
| | | * - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thr | |
| | | ead | |
| | | * of this function. | |
| | | * - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version | |
| | | for | |
| | | * which the function was compiled. This value is the major PTX version * | |
| | | 10 | |
| | | * + the minor PTX version, so a PTX version 1.3 function would return th | |
| | | e | |
| | | * value 13. Note that this may return the undefined value of 0 for cubin | |
| | | s | |
| | | * compiled prior to CUDA 3.0. | |
| | | * - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version fo | |
| | | r | |
| | | * which the function was compiled. This value is the major binary | |
| | | * version * 10 + the minor binary version, so a binary version 1.3 funct | |
| | | ion | |
| | | * would return the value 13. Note that this will return a value of 10 fo | |
| | | r | |
| | | * legacy cubins that do not have a properly-encoded binary architecture | |
| | | * version. | |
| | | * | |
| | | * \param pi - Returned attribute value | |
| | | * \param attrib - Attribute requested | |
| | | * \param hfunc - Function to query attribute of | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncSetCacheConfig, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, C | |
| | | Ufunction hfunc); | |
| | | | |
| | | /** | |
| | | * \brief Sets the preferred cache configuration for a device function | |
| | | * | |
| | | * On devices where the L1 cache and shared memory use the same hardware | |
| | | * resources, this sets through \p config the preferred cache configuration | |
| | | for | |
| | | * the device function \p hfunc. This is only a preference. The driver will | |
| | | use | |
| | | * the requested configuration if possible, but it is free to choose a diff | |
| | | erent | |
| | | * configuration if required to execute \p hfunc. Any context-wide prefere | |
| | | nce | |
| | | * set via ::cuCtxSetCacheConfig() will be overridden by this per-function | |
| | | * setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE. | |
| | | In | |
| | | * that case, the current context-wide setting will be used. | |
| | | * | |
| | | * This setting does nothing on devices where the size of the L1 cache and | |
| | | * shared memory are fixed. | |
| | | * | |
| | | * Launching a kernel with a different preference than the most recent | |
| | | * preference setting may insert a device-side synchronization point. | |
| | | * | |
| | | * | |
| | | * The supported cache configurations are: | |
| | | * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de | |
| | | fault) | |
| | | * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller | |
| | | L1 cache | |
| | | * - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m | |
| | | emory | |
| | | * | |
| | | * \param hfunc - Kernel to configure cache for | |
| | | * \param config - Requested cache configuration | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuCtxGetCacheConfig, | |
| | | * ::cuCtxSetCacheConfig, | |
| | | * ::cuFuncSetBlockShape, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config | |
| | | ); | |
| | | | |
| | | /** | |
| | | * \brief Sets the parameter size for the function | |
| | | * | |
| | | * Sets through \p numbytes the total size in bytes needed by the function | |
| | | * parameters of the kernel corresponding to \p hfunc. | |
| | | * | |
| | | * \param hfunc - Kernel to set parameter size for | |
| | | * \param numbytes - Size of parameter list in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes); | |
| | | | |
| | | /** | |
| | | * \brief Adds an integer parameter to the function's argument list | |
| | | * | |
| | | * Sets an integer parameter that will be specified the next time the | |
| | | * kernel corresponding to \p hfunc will be invoked. \p offset is a byte of | |
| | | fset. | |
| | | * | |
| | | * \param hfunc - Kernel to add parameter to | |
| | | * \param offset - Offset to add parameter to argument list | |
| | | * \param value - Value of parameter | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, unsigned int val | |
| | | ue); | |
| | | | |
| | | /** | |
| | | * \brief Adds a floating-point parameter to the function's argument list | |
| | | * | |
| | | * Sets a floating-point parameter that will be specified the next time the | |
| | | * kernel corresponding to \p hfunc will be invoked. \p offset is a byte of | |
| | | fset. | |
| | | * | |
| | | * \param hfunc - Kernel to add parameter to | |
| | | * \param offset - Offset to add parameter to argument list | |
| | | * \param value - Value of parameter | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, float value); | |
| | | | |
| | | /** | |
| | | * \brief Adds arbitrary data to the function's argument list | |
| | | * | |
| | | * Copies an arbitrary amount of data (specified in \p numbytes) from \p pt | |
| | | r | |
| | | * into the parameter space of the kernel corresponding to \p hfunc. \p off | |
| | | set | |
| | | * is a byte offset. | |
| | | * | |
| | | * \param hfunc - Kernel to add data to | |
| | | * \param offset - Offset to add data to argument list | |
| | | * \param ptr - Pointer to arbitrary data | |
| | | * \param numbytes - Size of data to copy in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSeti, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsig | |
| | | ned int numbytes); | |
| | | | |
| | | /** | |
| | | * \brief Launches a CUDA function | |
| | | * | |
| | | * Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block | |
| | | * contains the number of threads specified by a previous call to | |
| | | * ::cuFuncSetBlockShape(). | |
| | | * | |
| | | * \param f - Kernel to launch | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_LAUNCH_FAILED, | |
| | | * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, | |
| | | * ::CUDA_ERROR_LAUNCH_TIMEOUT, | |
| | | * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunchGrid, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuLaunch(CUfunction f); | |
| | | | |
| | | /** | |
| | | * \brief Launches a CUDA function | |
| | | * | |
| | | * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of | |
| | | * blocks. Each block contains the number of threads specified by a previou | |
| | | s | |
| | | * call to ::cuFuncSetBlockShape(). | |
| | | * | |
| | | * \param f - Kernel to launch | |
| | | * \param grid_width - Width of grid in blocks | |
| | | * \param grid_height - Height of grid in blocks | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_LAUNCH_FAILED, | |
| | | * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, | |
| | | * ::CUDA_ERROR_LAUNCH_TIMEOUT, | |
| | | * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGridAsync | |
| | | */ | |
| | | CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, int grid_height | |
| | | ); | |
| | | | |
| | | /** | |
| | | * \brief Launches a CUDA function | |
| | | * | |
| | | * Invokes the kernel \p f on a \p grid_width x \p grid_height grid of | |
| | | * blocks. Each block contains the number of threads specified by a previou | |
| | | s | |
| | | * call to ::cuFuncSetBlockShape(). | |
| | | * | |
| | | * ::cuLaunchGridAsync() can optionally be associated to a stream by passin | |
| | | g a | |
| | | * non-zero \p hStream argument. | |
| | | * | |
| | | * \param f - Kernel to launch | |
| | | * \param grid_width - Width of grid in blocks | |
| | | * \param grid_height - Height of grid in blocks | |
| | | * \param hStream - Stream identifier | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_LAUNCH_FAILED, | |
| | | * ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, | |
| | | * ::CUDA_ERROR_LAUNCH_TIMEOUT, | |
| | | * ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, | |
| | | * ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuFuncSetBlockShape, | |
| | | * ::cuFuncSetSharedSize, | |
| | | * ::cuFuncGetAttribute, | |
| | | * ::cuParamSetSize, | |
| | | * ::cuParamSetf, | |
| | | * ::cuParamSeti, | |
| | | * ::cuParamSetv, | |
| | | * ::cuLaunch, | |
| | | * ::cuLaunchGrid | |
| | | */ | |
| | | CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, int grid_width, int grid_h | |
| | | eight, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_EXEC_DEPRECATED Execution Control [DEPRECATED] | |
| | | * | |
| | | * This section describes the deprecated execution control functions of the | |
| | | * low-level CUDA driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Adds a texture-reference to the function's argument list | |
| | | * | |
| | | * \deprecated | |
| | | * | |
| | | * Makes the CUDA array or linear memory bound to the texture reference | |
| | | * \p hTexRef available to a device program as a texture. In this version o | |
| | | f | |
| | | * CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() a | |
| | | nd | |
| | | * the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT. | |
| | | * | |
| | | * \param hfunc - Kernel to add texture-reference to | |
| | | * \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT) | |
| | | * \param hTexRef - Texture-reference to add to argument list | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * \notefnerr | |
| | | */ | |
| | | CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref h | |
| | | TexRef); | |
| | | /** @} */ /* END CUDA_EXEC_DEPRECATED */ | |
| | | | |
| | | /** @} */ /* END CUDA_EXEC */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_TEXREF Texture Reference Management | |
| | | * | |
| | | * This section describes the texture reference management functions of the | |
| | | * low-level CUDA driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Binds an array as a texture reference | |
| | | * | |
| | | * Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any | |
| | | * previous address or CUDA array state associated with the texture referen | |
| | | ce | |
| | | * is superseded by this function. \p Flags must be set to | |
| | | * ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef | |
| | | is | |
| | | * unbound. | |
| | | * | |
| | | * \param hTexRef - Texture reference to bind | |
| | | * \param hArray - Array to bind | |
| | | * \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT) | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigne | |
| | | d int Flags); | |
| | | | |
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | /** | |
| | | * \brief Binds an address as a texture reference | |
| | | * | |
| | | * Binds a linear address range to the texture reference \p hTexRef. Any | |
| | | * previous address or CUDA array state associated with the texture referen | |
| | | ce | |
| | | * is superseded by this function. Any memory previously bound to \p hTexRe | |
| | | f | |
| | | * is unbound. | |
| | | * | |
| | | * Since the hardware enforces an alignment requirement on texture base | |
| | | * addresses, ::cuTexRefSetAddress() passes back a byte offset in | |
| | | * \p *ByteOffset that must be applied to texture fetches in order to read | |
| | | from | |
| | | * the desired memory. This offset must be divided by the texel size and | |
| | | * passed to kernels that read from the texture so they can be applied to t | |
| | | he | |
| | | * ::tex1Dfetch() function. | |
| | | * | |
| | | * If the device memory pointer was returned from ::cuMemAlloc(), the offse | |
| | | t | |
| | | * is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parame | |
| | | ter. | |
| | | * | |
| | | * \param ByteOffset - Returned byte offset | |
| | | * \param hTexRef - Texture reference to bind | |
| | | * \param dptr - Device pointer to bind | |
| | | * \param bytes - Size of memory to bind in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray | |
| | | , | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, C | |
| | | Udeviceptr dptr, size_t bytes); | |
| | | | |
| | | /** | |
| | | * \brief Binds an address as a 2D texture reference | |
| | | * | |
| | | * Binds a linear address range to the texture reference \p hTexRef. Any | |
| | | * previous address or CUDA array state associated with the texture referen | |
| | | ce | |
| | | * is superseded by this function. Any memory previously bound to \p hTexRe | |
| | | f | |
| | | * is unbound. | |
| | | * | |
| | | * Using a ::tex2D() function inside a kernel requires a call to either | |
| | | * ::cuTexRefSetArray() to bind the corresponding texture reference to an | |
| | | * array, or ::cuTexRefSetAddress2D() to bind the texture reference to line | |
| | | ar | |
| | | * memory. | |
| | | * | |
| | | * Function calls to ::cuTexRefSetFormat() cannot follow calls to | |
| | | * ::cuTexRefSetAddress2D() for the same texture reference. | |
| | | * | |
| | | * It is required that \p dptr be aligned to the appropriate hardware-speci | |
| | | fic | |
| | | * texture alignment. You can query this value using the device attribute | |
| | | * ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is | |
| | | * supplied, ::CUDA_ERROR_INVALID_VALUE is returned. | |
| | | * | |
| | | * \param hTexRef - Texture reference to bind | |
| | | * \param desc - Descriptor of CUDA array | |
| | | * \param dptr - Device pointer to bind | |
| | | * \param Pitch - Line pitch in bytes | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE | |
| | | SCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** | |
| | | * \brief Sets the format for a texture reference | |
| | | * | |
| | | * Specifies the format of the data to be read by the texture reference | |
| | | * \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to t | |
| | | he | |
| | | * ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR struct | |
| | | ure: | |
| | | * They specify the format of each component and the number of components p | |
| | | er | |
| | | * array element. | |
| | | * | |
| | | * \param hTexRef - Texture reference | |
| | | * \param fmt - Format to set | |
| | | * \param NumPackedComponents - Number of components per array element | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, in | |
| | | t NumPackedComponents); | |
| | | | |
| | | /** | |
| | | * \brief Sets the addressing mode for a texture reference | |
| | | * | |
| | | * Specifies the addressing mode \p am for the given dimension \p dim of th | |
| | | e | |
| | | * texture reference \p hTexRef. If \p dim is zero, the addressing mode is | |
| | | * applied to the first parameter of the functions used to fetch from the | |
| | | * texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defi | |
| | | ned | |
| | | * as: | |
| | | * \code | |
| | | typedef enum CUaddress_mode_enum { | |
| | | CU_TR_ADDRESS_MODE_WRAP = 0, | |
| | | CU_TR_ADDRESS_MODE_CLAMP = 1, | |
| | | CU_TR_ADDRESS_MODE_MIRROR = 2, | |
| | | CU_TR_ADDRESS_MODE_BORDER = 3 | |
| | | } CUaddress_mode; | |
| | | * \endcode | |
| | | * | |
| | | * Note that this call has no effect if \p hTexRef is bound to linear memor | |
| | | y. | |
| | | * | |
| | | * \param hTexRef - Texture reference | |
| | | * \param dim - Dimension | |
| | | * \param am - Addressing mode to set | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddres | |
| | | s_mode am); | |
| | | | |
| | | /** | |
| | | * \brief Sets the filtering mode for a texture reference | |
| | | * | |
| | | * Specifies the filtering mode \p fm to be used when reading memory throug | |
| | | h | |
| | | * the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as: | |
| | | * | |
| | | * \code | |
| | | typedef enum CUfilter_mode_enum { | |
| | | CU_TR_FILTER_MODE_POINT = 0, | |
| | | CU_TR_FILTER_MODE_LINEAR = 1 | |
| | | } CUfilter_mode; | |
| | | * \endcode | |
| | | * | |
| | | * Note that this call has no effect if \p hTexRef is bound to linear memor | |
| | | y. | |
| | | * | |
| | | * \param hTexRef - Texture reference | |
| | | * \param fm - Filtering mode to set | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); | |
| | | | |
| | | /** | |
| | | * \brief Sets the flags for a texture reference | |
| | | * | |
| | | * Specifies optional flags via \p Flags to specify the behavior of data | |
| | | * returned through the texture reference \p hTexRef. The valid flags are: | |
| | | * | |
| | | * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of | |
| | | * having the texture promote integer data to floating point data in the | |
| | | * range [0, 1]; | |
| | | * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavio | |
| | | r | |
| | | * of having the texture coordinates range from [0, Dim) where Dim is the | |
| | | * width or height of the CUDA array. Instead, the texture coordinates | |
| | | * [0, 1.0) reference the entire breadth of the array dimension; | |
| | | * | |
| | | * \param hTexRef - Texture reference | |
| | | * \param Flags - Optional flags to set | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags); | |
| | | | |
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | /** | |
| | | * \brief Gets the address associated with a texture reference | |
| | | * | |
| | | * Returns in \p *pdptr the base address bound to the texture reference | |
| | | * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen | |
| | | ce | |
| | | * is not bound to any device memory range. | |
| | | * | |
| | | * \param pdptr - Returned device address | |
| | | * \param hTexRef - Texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** | |
| | | * \brief Gets the array bound to a texture reference | |
| | | * | |
| | | * Returns in \p *phArray the CUDA array bound to the texture reference | |
| | | * \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen | |
| | | ce | |
| | | * is not bound to any CUDA array. | |
| | | * | |
| | | * \param phArray - Returned array | |
| | | * \param hTexRef - Texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef); | |
| | | | |
| | | /** | |
| | | * \brief Gets the addressing mode used by a texture reference | |
| | | * | |
| | | * Returns in \p *pam the addressing mode corresponding to the | |
| | | * dimension \p dim of the texture reference \p hTexRef. Currently, the onl | |
| | | y | |
| | | * valid value for \p dim are 0 and 1. | |
| | | * | |
| | | * \param pam - Returned addressing mode | |
| | | * \param hTexRef - Texture reference | |
| | | * \param dim - Dimension | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexR | |
| | | ef, int dim); | |
| | | | |
| | | /** | |
| | | * \brief Gets the filter-mode used by a texture reference | |
| | | * | |
| | | * Returns in \p *pfm the filtering mode of the texture reference | |
| | | * \p hTexRef. | |
| | | * | |
| | | * \param pfm - Returned filtering mode | |
| | | * \param hTexRef - Texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFlags, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef | |
| | | ); | |
| | | | |
| | | /** | |
| | | * \brief Gets the format used by a texture reference | |
| | | * | |
| | | * Returns in \p *pFormat and \p *pNumChannels the format and number | |
| | | * of components of the CUDA array bound to the texture reference \p hTexRe | |
| | | f. | |
| | | * If \p pFormat or \p pNumChannels is NULL, it will be ignored. | |
| | | * | |
| | | * \param pFormat - Returned format | |
| | | * \param pNumChannels - Returned number of components | |
| | | * \param hTexRef - Texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFlags | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChanne | |
| | | ls, CUtexref hTexRef); | |
| | | | |
| | | /** | |
| | | * \brief Gets the flags used by a texture reference | |
| | | * | |
| | | * Returns in \p *pFlags the flags of the texture reference \p hTexRef. | |
| | | * | |
| | | * \param pFlags - Returned flags | |
| | | * \param hTexRef - Texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefSetAddress, | |
| | | * ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, | |
| | | * ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, | |
| | | * ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, | |
| | | * ::cuTexRefGetFilterMode, ::cuTexRefGetFormat | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef); | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_TEXREF_DEPRECATED Texture Reference Management [DEPRECATE | |
| | | D] | |
| | | * | |
| | | * This section describes the deprecated texture reference management | |
| | | * functions of the low-level CUDA driver application programming interface | |
| | | . | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Creates a texture reference | |
| | | * | |
| | | * \deprecated | |
| | | * | |
| | | * Creates a texture reference and returns its handle in \p *pTexRef. Once | |
| | | * created, the application must call ::cuTexRefSetArray() or | |
| | | * ::cuTexRefSetAddress() to associate the reference with allocated memory. | |
| | | * Other texture reference functions are used to specify the format and | |
| | | * interpretation (addressing, filtering, etc.) to be used when the memory | |
| | | is | |
| | | * read through this texture reference. | |
| | | * | |
| | | * \param pTexRef - Returned texture reference | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefDestroy | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef); | |
| | | | |
| | | /** | |
| | | * \brief Destroys a texture reference | |
| | | * | |
| | | * \deprecated | |
| | | * | |
| | | * Destroys the texture reference specified by \p hTexRef. | |
| | | * | |
| | | * \param hTexRef - Texture reference to destroy | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuTexRefCreate | |
| | | */ | |
| | | CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef); | |
| | | | |
| | | /** @} */ /* END CUDA_TEXREF_DEPRECATED */ | |
| | | | |
| | | /** @} */ /* END CUDA_TEXREF */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_SURFREF Surface Reference Management | |
| | | * | |
| | | * This section describes the surface reference management functions of the | |
| | | * low-level CUDA driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Sets the CUDA array for a surface reference. | |
| | | * | |
| | | * Sets the CUDA array \p hArray to be read and written by the surface refe | |
| | | rence | |
| | | * \p hSurfRef. Any previous CUDA array state associated with the surface | |
| | | * reference is superseded by this function. \p Flags must be set to 0. | |
| | | * The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA arr | |
| | | ay. | |
| | | * Any CUDA array previously bound to \p hSurfRef is unbound. | |
| | | | |
| | | * \param hSurfRef - Surface reference handle | |
| | | * \param hArray - CUDA array handle | |
| | | * \param Flags - set to 0 | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuModuleGetSurfRef, ::cuSurfRefGetArray | |
| | | */ | |
| | | CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsi | |
| | | gned int Flags); | |
| | | | |
| | | /** | |
| | | * \brief Passes back the CUDA array bound to a surface reference. | |
| | | * | |
| | | * Returns in \p *phArray the CUDA array bound to the surface reference | |
| | | * \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface refere | |
| | | nce | |
| | | * is not bound to any CUDA array. | |
| | | | |
| | | * \param phArray - Surface reference handle | |
| | | * \param hSurfRef - Surface reference handle | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE | |
| | | * | |
| | | * \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray | |
| | | */ | |
| | | CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef); | |
| | | | |
| | | /** @} */ /* END CUDA_SURFREF */ | |
| | | | |
| | | /** | |
| | | * \defgroup CUDA_GRAPHICS Graphics Interoperability | |
| | | * | |
| | | * This section describes the graphics interoperability functions of the | |
| | | * low-level CUDA driver application programming interface. | |
| | | * | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| | | * \brief Unregisters a graphics resource for access by CUDA | |
| | | * | |
| | | * Unregisters the graphics resource \p resource so it is not accessible by | |
| | | * CUDA unless registered again. | |
| | | * | |
| | | * If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is | |
| | | * returned. | |
| | | * | |
| | | * \param resource - Resource to unregister | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuGraphicsD3D9RegisterResource, | |
| | | * ::cuGraphicsD3D10RegisterResource, | |
| | | * ::cuGraphicsD3D11RegisterResource, | |
| | | * ::cuGraphicsGLRegisterBuffer, | |
| | | * ::cuGraphicsGLRegisterImage | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource); | |
| | | | |
| | | /** | |
| | | * \brief Get an array through which to access a subresource of a mapped gr | |
| | | aphics resource. | |
| | | * | |
| | | * Returns in \p *pArray an array through which the subresource of the mapp | |
| | | ed | |
| | | * graphics resource \p resource which corresponds to array index \p arrayI | |
| | | ndex | |
| | | * and mipmap level \p mipLevel may be accessed. The value set in \p *pArr | |
| | | ay may | |
| | | * change every time that \p resource is mapped. | |
| | | * | |
| | | * If \p resource is not a texture then it cannot be accessed via an array | |
| | | and | |
| | | * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. | |
| | | * If \p arrayIndex is not a valid array index for \p resource then | |
| | | * ::CUDA_ERROR_INVALID_VALUE is returned. | |
| | | * If \p mipLevel is not a valid mipmap level for \p resource then | |
| | | * ::CUDA_ERROR_INVALID_VALUE is returned. | |
| | | * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. | |
| | | * | |
| | | * \param pArray - Returned array through which a subresource of \p re | |
| | | source may be accessed | |
| | | * \param resource - Mapped resource to access | |
| | | * \param arrayIndex - Array index for array textures or cubemap face | |
| | | * index as defined by ::CUarray_cubemap_face for | |
| | | * cubemap textures for the subresource to access | |
| | | * \param mipLevel - Mipmap level for the subresource to access | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_NOT_MAPPED | |
| | | * ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cuGraphicsResourceGetMappedPointer | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgra | |
| | | phicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); | |
| | | | |
| | | #if __CUDA_API_VERSION >= 3020 | |
| | | /** | |
| | | * \brief Get a device pointer through which to access a mapped graphics re | |
| | | source. | |
| | | * | |
| | | * Returns in \p *pDevPtr a pointer through which the mapped graphics resou | |
| | | rce | |
| | | * \p resource may be accessed. | |
| | | * Returns in \p pSize the size of the memory in bytes which may be accesse | |
| | | d from that pointer. | |
| | | * The value set in \p pPointer may change every time that \p resource is m | |
| | | apped. | |
| | | * | |
| | | * If \p resource is not a buffer then it cannot be accessed via a pointer | |
| | | and | |
| | | * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned. | |
| | | * If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. | |
| | | * * | |
| | | * \param pDevPtr - Returned pointer through which \p resource may be ac | |
| | | cessed | |
| | | * \param pSize - Returned size of the buffer accessible starting at \ | |
| | | p *pPointer | |
| | | * \param resource - Mapped resource to access | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_NOT_MAPPED | |
| | | * ::CUDA_ERROR_NOT_MAPPED_AS_POINTER | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuGraphicsMapResources, | |
| | | * ::cuGraphicsSubResourceGetMappedArray | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, s | |
| | | ize_t *pSize, CUgraphicsResource resource); | |
| | | #endif /* __CUDA_API_VERSION >= 3020 */ | |
| | | | |
| | | /** | |
| | | * \brief Set usage flags for mapping a graphics resource | |
| | | * | |
| | | * Set \p flags for mapping the graphics resource \p resource. | |
| | | * | |
| | | * Changes to \p flags will take effect the next time \p resource is mapped | |
| | | . | |
| | | * The \p flags argument may be any of the following: | |
| | | | |
| | | * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th | |
| | | is | |
| | | * resource will be used. It is therefore assumed that this resource will | |
| | | be | |
| | | * read from and written to by CUDA kernels. This is the default value. | |
| | | * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: Specifies that CUDA kernels | |
| | | which | |
| | | * access this resource will not write to this resource. | |
| | | * - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA ker | |
| | | nels | |
| | | * which access this resource will not read from this resource and will | |
| | | * write over the entire contents of the resource, so none of the data | |
| | | * previously stored in the resource will be preserved. | |
| | | * | |
| | | * If \p resource is presently mapped for access by CUDA then | |
| | | * ::CUDA_ERROR_ALREADY_MAPPED is returned. | |
| | | * If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VAL | |
| | | UE is returned. | |
| | | * | |
| | | * \param resource - Registered resource to set flags for | |
| | | * \param flags - Parameters for resource mapping | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_VALUE, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_ALREADY_MAPPED | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuGraphicsMapResources | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, | |
| | | unsigned int flags); | |
| | | | |
| | | /** | |
| | | * \brief Map graphics resources for access by CUDA | |
| | | * | |
| | | * Maps the \p count graphics resources in \p resources for access by CUDA. | |
| | | * | |
| | | * The resources in \p resources may be accessed by CUDA until they | |
| | | * are unmapped. The graphics API from which \p resources were registered | |
| | | * should not access any resources while they are mapped by CUDA. If an | |
| | | * application does so, the results are undefined. | |
| | | * | |
| | | * This function provides the synchronization guarantee that any graphics c | |
| | | alls | |
| | | * issued before ::cuGraphicsMapResources() will complete before any subseq | |
| | | uent CUDA | |
| | | * work issued in \p stream begins. | |
| | | * | |
| | | * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID | |
| | | _HANDLE is returned. | |
| | | * If any of \p resources are presently mapped for access by CUDA then ::CU | |
| | | DA_ERROR_ALREADY_MAPPED is returned. | |
| | | * | |
| | | * \param count - Number of resources to map | |
| | | * \param resources - Resources to map for CUDA usage | |
| | | * \param hStream - Stream with which to synchronize | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_ALREADY_MAPPED, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuGraphicsResourceGetMappedPointer | |
| | | * ::cuGraphicsSubResourceGetMappedArray | |
| | | * ::cuGraphicsUnmapResources | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResou | |
| | | rce *resources, CUstream hStream); | |
| | | | |
| | | /** | |
| | | * \brief Unmap graphics resources. | |
| | | * | |
| | | * Unmaps the \p count graphics resources in \p resources. | |
| | | * | |
| | | * Once unmapped, the resources in \p resources may not be accessed by CUDA | |
| | | * until they are mapped again. | |
| | | * | |
| | | * This function provides the synchronization guarantee that any CUDA work | |
| | | issued | |
| | | * in \p stream before ::cuGraphicsUnmapResources() will complete before an | |
| | | y | |
| | | * subsequently issued graphics work begins. | |
| | | * | |
| | | * | |
| | | * If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID | |
| | | _HANDLE is returned. | |
| | | * If any of \p resources are not presently mapped for access by CUDA then | |
| | | ::CUDA_ERROR_NOT_MAPPED is returned. | |
| | | * | |
| | | * \param count - Number of resources to unmap | |
| | | * \param resources - Resources to unmap | |
| | | * \param hStream - Stream with which to synchronize | |
| | | * | |
| | | * \return | |
| | | * ::CUDA_SUCCESS, | |
| | | * ::CUDA_ERROR_DEINITIALIZED, | |
| | | * ::CUDA_ERROR_NOT_INITIALIZED, | |
| | | * ::CUDA_ERROR_INVALID_CONTEXT, | |
| | | * ::CUDA_ERROR_INVALID_HANDLE, | |
| | | * ::CUDA_ERROR_NOT_MAPPED, | |
| | | * ::CUDA_ERROR_UNKNOWN | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa | |
| | | * ::cuGraphicsMapResources | |
| | | */ | |
| | | CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsRes | |
| | | ource *resources, CUstream hStream); | |
| | | | |
| | | /** @} */ /* END CUDA_GRAPHICS */ | |
| | | | |
| | | CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, const CUuuid | |
| | | *pExportTableId); | |
| | | | |
| | | /** @} */ /* END CUDA_DRIVER */ | |
| | | | |
| | | /** | |
| | | * CUDA API versioning support | |
| | | */ | |
| | | #if defined(__CUDA_API_VERSION_INTERNAL) | |
| | | #undef cuDeviceTotalMem | |
| | | #undef cuCtxCreate | |
| | | #undef cuModuleGetGlobal | |
| | | #undef cuMemGetInfo | |
| | | #undef cuMemAlloc | |
| | | #undef cuMemAllocPitch | |
| | | #undef cuMemFree | |
| | | #undef cuMemGetAddressRange | |
| | | #undef cuMemAllocHost | |
| | | #undef cuMemHostGetDevicePointer | |
| | | #undef cuMemcpyHtoD | |
| | | #undef cuMemcpyDtoH | |
| | | #undef cuMemcpyDtoD | |
| | | #undef cuMemcpyDtoA | |
| | | #undef cuMemcpyAtoD | |
| | | #undef cuMemcpyHtoA | |
| | | #undef cuMemcpyAtoH | |
| | | #undef cuMemcpyAtoA | |
| | | #undef cuMemcpyHtoAAsync | |
| | | #undef cuMemcpyAtoHAsync | |
| | | #undef cuMemcpy2D | |
| | | #undef cuMemcpy2DUnaligned | |
| | | #undef cuMemcpy3D | |
| | | #undef cuMemcpyHtoDAsync | |
| | | #undef cuMemcpyDtoHAsync | |
| | | #undef cuMemcpyDtoDAsync | |
| | | #undef cuMemcpy2DAsync | |
| | | #undef cuMemcpy3DAsync | |
| | | #undef cuMemsetD8 | |
| | | #undef cuMemsetD16 | |
| | | #undef cuMemsetD32 | |
| | | #undef cuMemsetD2D8 | |
| | | #undef cuMemsetD2D16 | |
| | | #undef cuMemsetD2D32 | |
| | | #undef cuArrayCreate | |
| | | #undef cuArrayGetDescriptor | |
| | | #undef cuArray3DCreate | |
| | | #undef cuArray3DGetDescriptor | |
| | | #undef cuTexRefSetAddress | |
| | | #undef cuTexRefSetAddress2D | |
| | | #undef cuTexRefGetAddress | |
| | | #undef cuGraphicsResourceGetMappedPointer | |
| | | #endif /* __CUDA_API_VERSION_INTERNAL */ | |
| | | | |
| | | /** | |
| | | * CUDA API made obselete at API version 3020 | |
| | | */ | |
| | | #if defined(__CUDA_API_VERSION_INTERNAL) | |
| | | #define CUdeviceptr CUdeviceptr_v1 | |
| | | #define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st | |
| | | #define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1 | |
| | | #define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st | |
| | | #define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1 | |
| | | #define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st | |
| | | #define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1 | |
| | | #define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st | |
| | | #define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1 | |
| | | #endif /* CUDA_FORCE_LEGACY32_INTERNAL */ | |
| | | | |
| | | #if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 | |
| | | | |
| | | typedef unsigned int CUdeviceptr; | |
| | | | |
| | | typedef struct CUDA_MEMCPY2D_st | |
| | | { | |
| | | unsigned int srcXInBytes; /**< Source X in bytes */ | |
| | | unsigned int srcY; /**< Source Y */ | |
| | | CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra | |
| | | y) */ | |
| | | const void *srcHost; /**< Source host pointer */ | |
| | | CUdeviceptr srcDevice; /**< Source device pointer */ | |
| | | CUarray srcArray; /**< Source array reference */ | |
| | | unsigned int srcPitch; /**< Source pitch (ignored when src is arra | |
| | | y) */ | |
| | | | |
| | | unsigned int dstXInBytes; /**< Destination X in bytes */ | |
| | | unsigned int dstY; /**< Destination Y */ | |
| | | CUmemorytype dstMemoryType; /**< Destination memory type (host, device, | |
| | | array) */ | |
| | | void *dstHost; /**< Destination host pointer */ | |
| | | CUdeviceptr dstDevice; /**< Destination device pointer */ | |
| | | CUarray dstArray; /**< Destination array reference */ | |
| | | unsigned int dstPitch; /**< Destination pitch (ignored when dst is | |
| | | array) */ | |
| | | | |
| | | unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */ | |
| | | unsigned int Height; /**< Height of 2D memory copy */ | |
| | | } CUDA_MEMCPY2D; | |
| | | | |
| | | typedef struct CUDA_MEMCPY3D_st | |
| | | { | |
| | | unsigned int srcXInBytes; /**< Source X in bytes */ | |
| | | unsigned int srcY; /**< Source Y */ | |
| | | unsigned int srcZ; /**< Source Z */ | |
| | | unsigned int srcLOD; /**< Source LOD */ | |
| | | CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra | |
| | | y) */ | |
| | | const void *srcHost; /**< Source host pointer */ | |
| | | CUdeviceptr srcDevice; /**< Source device pointer */ | |
| | | CUarray srcArray; /**< Source array reference */ | |
| | | void *reserved0; /**< Must be NULL */ | |
| | | unsigned int srcPitch; /**< Source pitch (ignored when src is arra | |
| | | y) */ | |
| | | unsigned int srcHeight; /**< Source height (ignored when src is arr | |
| | | ay; may be 0 if Depth==1) */ | |
| | | | |
| | | unsigned int dstXInBytes; /**< Destination X in bytes */ | |
| | | unsigned int dstY; /**< Destination Y */ | |
| | | unsigned int dstZ; /**< Destination Z */ | |
| | | unsigned int dstLOD; /**< Destination LOD */ | |
| | | CUmemorytype dstMemoryType; /**< Destination memory type (host, device, | |
| | | array) */ | |
| | | void *dstHost; /**< Destination host pointer */ | |
| | | CUdeviceptr dstDevice; /**< Destination device pointer */ | |
| | | CUarray dstArray; /**< Destination array reference */ | |
| | | void *reserved1; /**< Must be NULL */ | |
| | | unsigned int dstPitch; /**< Destination pitch (ignored when dst is | |
| | | array) */ | |
| | | unsigned int dstHeight; /**< Destination height (ignored when dst i | |
| | | s array; may be 0 if Depth==1) */ | |
| | | | |
| | | unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */ | |
| | | unsigned int Height; /**< Height of 3D memory copy */ | |
| | | unsigned int Depth; /**< Depth of 3D memory copy */ | |
| | | } CUDA_MEMCPY3D; | |
| | | | |
| | | typedef struct CUDA_ARRAY_DESCRIPTOR_st | |
| | | { | |
| | | unsigned int Width; /**< Width of array */ | |
| | | unsigned int Height; /**< Height of array */ | |
| | | | |
| | | CUarray_format Format; /**< Array format */ | |
| | | unsigned int NumChannels; /**< Channels per array element */ | |
| | | } CUDA_ARRAY_DESCRIPTOR; | |
| | | | |
| | | typedef struct CUDA_ARRAY3D_DESCRIPTOR_st | |
| | | { | |
| | | unsigned int Width; /**< Width of 3D array */ | |
| | | unsigned int Height; /**< Height of 3D array */ | |
| | | unsigned int Depth; /**< Depth of 3D array */ | |
| | | | |
| | | CUarray_format Format; /**< Array format */ | |
| | | unsigned int NumChannels; /**< Channels per array element */ | |
| | | unsigned int Flags; /**< Flags */ | |
| | | } CUDA_ARRAY3D_DESCRIPTOR; | |
| | | | |
| | | CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev); | |
| | | CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice | |
| | | dev); | |
| | | CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, | |
| | | CUmodule hmod, const char *name); | |
| | | CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total); | |
| | | CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize); | |
| | | CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, unsigned int *pPitch, u | |
| | | nsigned int WidthInBytes, unsigned int Height, unsigned int ElementSizeByte | |
| | | s); | |
| | | CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); | |
| | | CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psi | |
| | | ze, CUdeviceptr dptr); | |
| | | CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize); | |
| | | CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, uns | |
| | | igned int Flags); | |
| | | CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, u | |
| | | nsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigne | |
| | | d int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, | |
| | | unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUd | |
| | | eviceptr srcDevice, unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsi | |
| | | gned int srcOffset, unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, con | |
| | | st void *srcHost, unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int | |
| | | srcOffset, unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUa | |
| | | rray srcArray, unsigned int srcOffset, unsigned int ByteCount); | |
| | | CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset | |
| | | , const void *srcHost, unsigned int ByteCount, CUstream hStream); | |
| | | CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigne | |
| | | d int srcOffset, unsigned int ByteCount, CUstream hStream); | |
| | | CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy); | |
| | | CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); | |
| | | CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy); | |
| | | CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo | |
| | | st, unsigned int ByteCount, CUstream hStream); | |
| | | CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, un | |
| | | signed int ByteCount, CUstream hStream); | |
| | | CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe | |
| | | vice, unsigned int ByteCount, CUstream hStream); | |
| | | CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre | |
| | | am); | |
| | | CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre | |
| | | am); | |
| | | CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsign | |
| | | ed int N); | |
| | | CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsi | |
| | | gned int N); | |
| | | CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsign | |
| | | ed int N); | |
| | | CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, | |
| | | unsigned char uc, unsigned int Width, unsigned int Height); | |
| | | CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch | |
| | | , unsigned short us, unsigned int Width, unsigned int Height); | |
| | | CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch | |
| | | , unsigned int ui, unsigned int Width, unsigned int Height); | |
| | | CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO | |
| | | R *pAllocateArray); | |
| | | CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript | |
| | | or, CUarray hArray); | |
| | | CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR | |
| | | IPTOR *pAllocateArray); | |
| | | CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc | |
| | | riptor, CUarray hArray); | |
| | | CUresult CUDAAPI cuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTex | |
| | | Ref, CUdeviceptr dptr, unsigned int bytes); | |
| | | CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE | |
| | | SCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); | |
| | | CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); | |
| | | CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, u | |
| | | nsigned int *pSize, CUgraphicsResource resource); | |
| | | | |
| | | #endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 3020 */ | |
| | | | |
| | | #if defined(__CUDA_API_VERSION_INTERNAL) | |
| | | #undef CUdeviceptr | |
| | | #undef CUDA_MEMCPY2D_st | |
| | | #undef CUDA_MEMCPY2D | |
| | | #undef CUDA_MEMCPY3D_st | |
| | | #undef CUDA_MEMCPY3D | |
| | | #undef CUDA_ARRAY_DESCRIPTOR_st | |
| | | #undef CUDA_ARRAY_DESCRIPTOR | |
| | | #undef CUDA_ARRAY3D_DESCRIPTOR_st | |
| | | #undef CUDA_ARRAY3D_DESCRIPTOR | |
| | | #endif /* __CUDA_API_VERSION_INTERNAL */ | |
| | | | |
| #ifdef __cplusplus | | #ifdef __cplusplus | |
| } | | } | |
| #endif | | #endif | |
| | | | |
|
| | | #undef __CUDA_API_VERSION | |
| | | | |
| #endif /* __cuda_cuda_h__ */ | | #endif /* __cuda_cuda_h__ */ | |
| | | | |
End of changes. 111 change blocks. |
| 628 lines changed or deleted | | 6635 lines changed or added | |
|
| cuda_runtime.h | | cuda_runtime.h | |
| | | | |
| skipping to change at line 79 | | skipping to change at line 79 | |
| | | | |
| #if defined(__cplusplus) | | #if defined(__cplusplus) | |
| | | | |
| /**************************************************************************
***** | | /**************************************************************************
***** | |
| *
* | | *
* | |
| *
* | | *
* | |
| *
* | | *
* | |
| ***************************************************************************
****/ | | ***************************************************************************
****/ | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | * \addtogroup CUDART_HIGHLEVEL | |
| | | * @{ | |
| | | */ | |
| | | | |
| | | /** | |
| * \brief \hl Configure a device launch | | * \brief \hl Configure a device launch | |
| * | | * | |
| * Pushes \p size bytes of the argument pointed to by \p arg at \p offset | | * Pushes \p size bytes of the argument pointed to by \p arg at \p offset | |
| * bytes from the start of the parameter passing area, which starts at | | * bytes from the start of the parameter passing area, which starts at | |
| * offset 0. The arguments are stored in the top of the execution stack. | | * offset 0. The arguments are stored in the top of the execution stack. | |
|
| * \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument()" must be precede
d | | * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument()" must be preced
ed | |
| * by a call to ::cudaConfigureCall(). | | * by a call to ::cudaConfigureCall(). | |
| * | | * | |
| * \param arg - Argument to push for a kernel launch | | * \param arg - Argument to push for a kernel launch | |
| * \param offset - Offset in argument stack to push new arg | | * \param offset - Offset in argument stack to push new arg | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess | | * ::cudaSuccess | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa ::cudaConfigureCall, | | * \sa ::cudaConfigureCall, | |
| | | | |
| skipping to change at line 111 | | skipping to change at line 115 | |
| */ | | */ | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaSetupArgument( | | __inline__ __host__ cudaError_t cudaSetupArgument( | |
| T arg, | | T arg, | |
| size_t offset | | size_t offset | |
| ) | | ) | |
| { | | { | |
| return cudaSetupArgument((const void*)&arg, sizeof(T), offset); | | return cudaSetupArgument((const void*)&arg, sizeof(T), offset); | |
| } | | } | |
| | | | |
|
| | | /** | |
| | | * \brief \hl Creates an event object with the specified flags | |
| | | * | |
| | | * Creates an event object with the specified flags. Valid flags include: | |
| | | * - ::cudaEventDefault: Default event creation flag. | |
| | | * - ::cudaEventBlockingSync: Specifies that event should use blocking | |
| | | * synchronization. A host thread that uses ::cudaEventSynchronize() to w | |
| | | ait | |
| | | * on an event created with this flag will block until the event actually | |
| | | * completes. | |
| | | * - ::cudaEventDisableTiming: Specifies that the created event does not ne | |
| | | ed | |
| | | * to record timing data. Events created with this flag specified and | |
| | | * the ::cudaEventBlockingSync flag not specified will provide the best | |
| | | * performance when used with ::cudaStreamWaitEvent() and ::cudaEventQuer | |
| | | y(). | |
| | | * | |
| | | * \param event - Newly created event | |
| | | * \param flags - Flags for new event | |
| | | * | |
| | | * \return | |
| | | * ::cudaSuccess, | |
| | | * ::cudaErrorInitializationError, | |
| | | * ::cudaErrorInvalidValue, | |
| | | * ::cudaErrorLaunchFailure, | |
| | | * ::cudaErrorMemoryAllocation | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa \ref ::cudaEventCreate(cudaEvent_t*) "cudaEventCreate (C API)", | |
| | | * ::cudaEventCreateWithFlags, ::cudaEventRecord, ::cudaEventQuery, | |
| | | * ::cudaEventSynchronize, ::cudaEventDestroy, ::cudaEventElapsedTime, | |
| | | * ::cudaStreamWaitEvent | |
| | | */ | |
| | | static __inline__ __host__ cudaError_t cudaEventCreate( | |
| | | cudaEvent_t *event, | |
| | | unsigned int flags | |
| | | ) | |
| | | { | |
| | | return cudaEventCreateWithFlags(event, cudaEventDefault); | |
| | | } | |
| | | | |
| | | /** | |
| | | * \brief \hl Allocates page-locked memory on the host | |
| | | * | |
| | | * Allocates \p size bytes of host memory that is page-locked and accessibl | |
| | | e | |
| | | * to the device. The driver tracks the virtual memory ranges allocated wit | |
| | | h | |
| | | * this function and automatically accelerates calls to functions such as | |
| | | * ::cudaMemcpy(). Since the memory can be accessed directly by the device, | |
| | | it | |
| | | * can be read or written with much higher bandwidth than pageable memory | |
| | | * obtained with functions such as ::malloc(). Allocating excessive amounts | |
| | | of | |
| | | * pinned memory may degrade system performance, since it reduces the amoun | |
| | | t | |
| | | * of memory available to the system for paging. As a result, this function | |
| | | is | |
| | | * best used sparingly to allocate staging areas for data exchange between | |
| | | host | |
| | | * and device. | |
| | | * | |
| | | * The \p flags parameter enables different options to be specified that af | |
| | | fect | |
| | | * the allocation, as follows. | |
| | | * - ::cudaHostAllocDefault: This flag's value is defined to be 0. | |
| | | * - ::cudaHostAllocPortable: The memory returned by this call will be | |
| | | * considered as pinned memory by all CUDA contexts, not just the one that | |
| | | * performed the allocation. | |
| | | * - ::cudaHostAllocMapped: Maps the allocation into the CUDA address space | |
| | | . | |
| | | * The device pointer to the memory may be obtained by calling | |
| | | * ::cudaHostGetDevicePointer(). | |
| | | * - ::cudaHostAllocWriteCombined: Allocates the memory as write-combined ( | |
| | | WC). | |
| | | * WC memory can be transferred across the PCI Express bus more quickly on | |
| | | some | |
| | | * system configurations, but cannot be read efficiently by most CPUs. WC | |
| | | * memory is a good option for buffers that will be written by the CPU and | |
| | | read | |
| | | * by the device via mapped pinned memory or host->device transfers. | |
| | | * | |
| | | * All of these flags are orthogonal to one another: a developer may alloca | |
| | | te | |
| | | * memory that is portable, mapped and/or write-combined with no restrictio | |
| | | ns. | |
| | | * | |
| | | * ::cudaSetDeviceFlags() must have been called with the ::cudaDeviceMapHos | |
| | | t | |
| | | * flag in order for the ::cudaHostAllocMapped flag to have any effect. | |
| | | * | |
| | | * The ::cudaHostAllocMapped flag may be specified on CUDA contexts for dev | |
| | | ices | |
| | | * that do not support mapped pinned memory. The failure is deferred to | |
| | | * ::cudaHostGetDevicePointer() because the memory may be mapped into other | |
| | | * CUDA contexts via the ::cudaHostAllocPortable flag. | |
| | | * | |
| | | * Memory allocated by this function must be freed with ::cudaFreeHost(). | |
| | | * | |
| | | * \param ptr - Device pointer to allocated memory | |
| | | * \param size - Requested allocation size in bytes | |
| | | * \param flags - Requested properties of allocated memory | |
| | | * | |
| | | * \return | |
| | | * ::cudaSuccess, | |
| | | * ::cudaErrorMemoryAllocation | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa ::cudaSetDeviceFlags, | |
| | | * \ref ::cudaMallocHost(void**, size_t) "cudaMallocHost (C API)", | |
| | | * ::cudaFreeHost, ::cudaHostAlloc | |
| | | */ | |
| | | static __inline__ __host__ cudaError_t cudaMallocHost( | |
| | | void **ptr, | |
| | | size_t size, | |
| | | unsigned int flags | |
| | | ) | |
| | | { | |
| | | return cudaHostAlloc(ptr, size, flags); | |
| | | } | |
| | | | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaHostAlloc( | | __inline__ __host__ cudaError_t cudaHostAlloc( | |
| T **ptr, | | T **ptr, | |
| size_t size, | | size_t size, | |
| unsigned int flags | | unsigned int flags | |
| ) | | ) | |
| { | | { | |
| return cudaHostAlloc((void**)(void*)ptr, size, flags); | | return cudaHostAlloc((void**)(void*)ptr, size, flags); | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 142 | | skipping to change at line 248 | |
| __inline__ __host__ cudaError_t cudaMalloc( | | __inline__ __host__ cudaError_t cudaMalloc( | |
| T **devPtr, | | T **devPtr, | |
| size_t size | | size_t size | |
| ) | | ) | |
| { | | { | |
| return cudaMalloc((void**)(void*)devPtr, size); | | return cudaMalloc((void**)(void*)devPtr, size); | |
| } | | } | |
| | | | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaMallocHost( | | __inline__ __host__ cudaError_t cudaMallocHost( | |
|
| T **ptr, | | T **ptr, | |
| size_t size | | size_t size, | |
| | | unsigned int flags = 0 | |
| ) | | ) | |
| { | | { | |
|
| return cudaMallocHost((void**)(void*)ptr, size); | | return cudaMallocHost((void**)(void*)ptr, size, flags); | |
| } | | } | |
| | | | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaMallocPitch( | | __inline__ __host__ cudaError_t cudaMallocPitch( | |
| T **devPtr, | | T **devPtr, | |
| size_t *pitch, | | size_t *pitch, | |
| size_t width, | | size_t width, | |
| size_t height | | size_t height | |
| ) | | ) | |
| { | | { | |
| | | | |
| skipping to change at line 168 | | skipping to change at line 275 | |
| } | | } | |
| | | | |
| #if defined(__CUDACC__) | | #if defined(__CUDACC__) | |
| | | | |
| /**************************************************************************
***** | | /**************************************************************************
***** | |
| *
* | | *
* | |
| *
* | | *
* | |
| *
* | | *
* | |
| ***************************************************************************
****/ | | ***************************************************************************
****/ | |
| | | | |
|
| /** | | | |
| * \addtogroup CUDART_HIGHLEVEL | | | |
| * @{ | | | |
| */ | | | |
| | | | |
| static __inline__ __host__ cudaError_t cudaMemcpyToSymbol( | | static __inline__ __host__ cudaError_t cudaMemcpyToSymbol( | |
| char *symbol, | | char *symbol, | |
| const void *src, | | const void *src, | |
| size_t count, | | size_t count, | |
| size_t offset = 0, | | size_t offset = 0, | |
| enum cudaMemcpyKind kind = cudaMemcpyHostToDevice | | enum cudaMemcpyKind kind = cudaMemcpyHostToDevice | |
| ) | | ) | |
| { | | { | |
| return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind); | | return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind); | |
| } | | } | |
| | | | |
| skipping to change at line 365 | | skipping to change at line 467 | |
| */ | | */ | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaGetSymbolSize( | | __inline__ __host__ cudaError_t cudaGetSymbolSize( | |
| size_t *size, | | size_t *size, | |
| const T &symbol | | const T &symbol | |
| ) | | ) | |
| { | | { | |
| return cudaGetSymbolSize(size, (const char*)&symbol); | | return cudaGetSymbolSize(size, (const char*)&symbol); | |
| } | | } | |
| | | | |
|
| /** @} */ /* END CUDART_HIGHLEVEL */ | | | |
| | | | |
| /**************************************************************************
***** | | /**************************************************************************
***** | |
| *
* | | *
* | |
| *
* | | *
* | |
| *
* | | *
* | |
| ***************************************************************************
****/ | | ***************************************************************************
****/ | |
| | | | |
| /** | | /** | |
|
| * \addtogroup CUDART_HIGHLEVEL | | | |
| * | | | |
| * @{ | | | |
| */ | | | |
| | | | |
| /** | | | |
| * \brief \hl Binds a memory area to a texture | | * \brief \hl Binds a memory area to a texture | |
| * | | * | |
| * Binds \p size bytes of the memory area pointed to by \p devPtr to textur
e | | * Binds \p size bytes of the memory area pointed to by \p devPtr to textur
e | |
| * reference \p tex. \p desc describes how the memory is interpreted when | | * reference \p tex. \p desc describes how the memory is interpreted when | |
| * fetching values from the texture. The \p offset parameter is an optional | | * fetching values from the texture. The \p offset parameter is an optional | |
| * byte offset as with the low-level | | * byte offset as with the low-level | |
| * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture()" | | * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture()" | |
| * function. Any memory previously bound to \p tex is unbound. | | * function. Any memory previously bound to \p tex is unbound. | |
| * | | * | |
| * \param offset - Offset in bytes | | * \param offset - Offset in bytes | |
| | | | |
| skipping to change at line 405 | | skipping to change at line 499 | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidDevicePointer, | | * ::cudaErrorInvalidDevicePointer, | |
| * ::cudaErrorInvalidTexture | | * ::cudaErrorInvalidTexture | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
| * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)"
, | | * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)"
, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| ureToArray (C++ API)", | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| l descriptor)", | | reToArray (C++ API)", | |
| * \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| UnbindTexture (C++ API)", | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d | | descriptor)", | |
| im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU | |
| | | nbindTexture (C++ API)", | |
| | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di | |
| | | m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaBindTexture( | | __inline__ __host__ cudaError_t cudaBindTexture( | |
| size_t *offset, | | size_t *offset, | |
| const struct texture<T, dim, readMode> &tex, | | const struct texture<T, dim, readMode> &tex, | |
| const void *devPtr, | | const void *devPtr, | |
| const struct cudaChannelFormatDesc &desc, | | const struct cudaChannelFormatDesc &desc, | |
| size_t size = UINT_MAX | | size_t size = UINT_MAX | |
| ) | | ) | |
| { | | { | |
| | | | |
| skipping to change at line 446 | | skipping to change at line 541 | |
| * \param devPtr - Memory area on device | | * \param devPtr - Memory area on device | |
| * \param size - Size of the memory area pointed to by devPtr | | * \param size - Size of the memory area pointed to by devPtr | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidDevicePointer, | | * ::cudaErrorInvalidDevicePointer, | |
| * ::cudaErrorInvalidTexture | | * ::cudaErrorInvalidTexture | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
|
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API), | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
| * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)"
, | | * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo
id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)"
, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| tor)", | | (C++ API)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| ureToArray (C++ API)", | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| l descriptor), | | reToArray (C++ API)", | |
| | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| | | descriptor)", | |
| * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)", | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)", | |
|
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d
im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaBindTexture( | | __inline__ __host__ cudaError_t cudaBindTexture( | |
| size_t *offset, | | size_t *offset, | |
| const struct texture<T, dim, readMode> &tex, | | const struct texture<T, dim, readMode> &tex, | |
| const void *devPtr, | | const void *devPtr, | |
| size_t size = UINT_MAX | | size_t size = UINT_MAX | |
| ) | | ) | |
| { | | { | |
| return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size); | | return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size); | |
| | | | |
| skipping to change at line 478 | | skipping to change at line 574 | |
| * \brief \hl Binds a 2D memory area to a texture | | * \brief \hl Binds a 2D memory area to a texture | |
| * | | * | |
| * Binds the 2D memory area pointed to by \p devPtr to the | | * Binds the 2D memory area pointed to by \p devPtr to the | |
| * texture reference \p tex. The size of the area is constrained by | | * texture reference \p tex. The size of the area is constrained by | |
| * \p width in texel units, \p height in texel units, and \p pitch in byte | | * \p width in texel units, \p height in texel units, and \p pitch in byte | |
| * units. \p desc describes how the memory is interpreted when fetching val
ues | | * units. \p desc describes how the memory is interpreted when fetching val
ues | |
| * from the texture. Any memory previously bound to \p tex is unbound. | | * from the texture. Any memory previously bound to \p tex is unbound. | |
| * | | * | |
| * Since the hardware enforces an alignment requirement on texture base | | * Since the hardware enforces an alignment requirement on texture base | |
| * addresses, | | * addresses, | |
|
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_
t) "cudaBindTexture2D()" | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
) "cudaBindTexture2D()" | |
| * returns in \p *offset a byte offset that | | * returns in \p *offset a byte offset that | |
| * must be applied to texture fetches in order to read from the desired mem
ory. | | * must be applied to texture fetches in order to read from the desired mem
ory. | |
| * This offset must be divided by the texel size and passed to kernels that | | * This offset must be divided by the texel size and passed to kernels that | |
| * read from the texture so they can be applied to the ::tex2D() function. | | * read from the texture so they can be applied to the ::tex2D() function. | |
| * If the device memory pointer was returned from ::cudaMalloc(), the offse
t is | | * If the device memory pointer was returned from ::cudaMalloc(), the offse
t is | |
| * guaranteed to be 0 and NULL may be passed as the \p offset parameter. | | * guaranteed to be 0 and NULL may be passed as the \p offset parameter. | |
| * | | * | |
| * \param offset - Offset in bytes | | * \param offset - Offset in bytes | |
| * \param tex - Texture reference to bind | | * \param tex - Texture reference to bind | |
| * \param devPtr - 2D memory area on device | | * \param devPtr - 2D memory area on device | |
| | | | |
| skipping to change at line 501 | | skipping to change at line 597 | |
| * \param height - Height in texel units | | * \param height - Height in texel units | |
| * \param pitch - Pitch in bytes | | * \param pitch - Pitch in bytes | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidDevicePointer, | | * ::cudaErrorInvalidDevicePointer, | |
| * ::cudaErrorInvalidTexture | | * ::cudaErrorInvalidTexture | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
|
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API), | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| e (C++ API), | | (C++ API)", | |
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const
void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi
ndTexture2D (C API)", | | * \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const
void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi
ndTexture2D (C API)", | |
|
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| ureToArray (C++ API)", | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| l descriptor), | | reToArray (C++ API)", | |
| | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| | | descriptor)", | |
| * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)", | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)", | |
|
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d
im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaBindTexture2D( | | __inline__ __host__ cudaError_t cudaBindTexture2D( | |
| size_t *offset, | | size_t *offset, | |
| const struct texture<T, dim, readMode> &tex, | | const struct texture<T, dim, readMode> &tex, | |
| const void *devPtr, | | const void *devPtr, | |
| const struct cudaChannelFormatDesc &desc, | | const struct cudaChannelFormatDesc &desc, | |
| size_t width, | | size_t width, | |
| size_t height, | | size_t height, | |
| size_t pitch | | size_t pitch | |
| ) | | ) | |
| { | | { | |
|
| return cudaBindTexture2D( offset, &tex, devPtr, &desc, width, height, pit | | return cudaBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitc | |
| ch); | | h); | |
| | | } | |
| | | | |
| | | /** | |
| | | * \brief \hl Binds a 2D memory area to a texture | |
| | | * | |
| | | * Binds the 2D memory area pointed to by \p devPtr to the | |
| | | * texture reference \p tex. The size of the area is constrained by | |
| | | * \p width in texel units, \p height in texel units, and \p pitch in byte | |
| | | * units. The channel descriptor is inherited from the texture reference | |
| | | * type. Any memory previously bound to \p tex is unbound. | |
| | | * | |
| | | * Since the hardware enforces an alignment requirement on texture base | |
| | | * addresses, | |
| | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D()" | |
| | | * returns in \p *offset a byte offset that | |
| | | * must be applied to texture fetches in order to read from the desired mem | |
| | | ory. | |
| | | * This offset must be divided by the texel size and passed to kernels that | |
| | | * read from the texture so they can be applied to the ::tex2D() function. | |
| | | * If the device memory pointer was returned from ::cudaMalloc(), the offse | |
| | | t is | |
| | | * guaranteed to be 0 and NULL may be passed as the \p offset parameter. | |
| | | * | |
| | | * \param offset - Offset in bytes | |
| | | * \param tex - Texture reference to bind | |
| | | * \param devPtr - 2D memory area on device | |
| | | * \param width - Width in texel units | |
| | | * \param height - Height in texel units | |
| | | * \param pitch - Pitch in bytes | |
| | | * | |
| | | * \return | |
| | | * ::cudaSuccess, | |
| | | * ::cudaErrorInvalidValue, | |
| | | * ::cudaErrorInvalidDevicePointer, | |
| | | * ::cudaErrorInvalidTexture | |
| | | * \notefnerr | |
| | | * | |
| | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" | |
| | | , | |
| | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
| | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| | | (C++ API)", | |
| | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| | | or)", | |
| | | * \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const | |
| | | void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi | |
| | | ndTexture2D (C API)", | |
| | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| | | ) "cudaBindTexture2D (C++ API)", | |
| | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| | | reToArray (C++ API)", | |
| | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| | | descriptor)", | |
| | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU | |
| | | nbindTexture (C++ API)", | |
| | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di | |
| | | m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| | | */ | |
| | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| | | __inline__ __host__ cudaError_t cudaBindTexture2D( | |
| | | size_t *offset, | |
| | | const struct texture<T, dim, readMode> &tex, | |
| | | const void *devPtr, | |
| | | size_t width, | |
| | | size_t height, | |
| | | size_t pitch | |
| | | ) | |
| | | { | |
| | | return cudaBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, h | |
| | | eight, pitch); | |
| } | | } | |
| | | | |
| /** | | /** | |
| * \brief \hl Binds an array to a texture | | * \brief \hl Binds an array to a texture | |
| * | | * | |
| * Binds the CUDA array \p array to the texture reference \p tex. | | * Binds the CUDA array \p array to the texture reference \p tex. | |
| * \p desc describes how the memory is interpreted when fetching values fro
m | | * \p desc describes how the memory is interpreted when fetching values fro
m | |
| * the texture. Any CUDA array previously bound to \p tex is unbound. | | * the texture. Any CUDA array previously bound to \p tex is unbound. | |
| * | | * | |
| * \param tex - Texture to bind | | * \param tex - Texture to bind | |
| | | | |
| skipping to change at line 545 | | skipping to change at line 699 | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidDevicePointer, | | * ::cudaErrorInvalidDevicePointer, | |
| * ::cudaErrorInvalidTexture | | * ::cudaErrorInvalidTexture | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| e (C++ API)", | | (C++ API)", | |
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru
ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray
(C API)", | | * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru
ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray
(C API)", | |
|
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| l descriptor)", | | descriptor)", | |
| * \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU | |
| UnbindTexture (C++ API)", | | nbindTexture (C++ API)", | |
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di | |
| im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | | m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaBindTextureToArray( | | __inline__ __host__ cudaError_t cudaBindTextureToArray( | |
| const struct texture<T, dim, readMode> &tex, | | const struct texture<T, dim, readMode> &tex, | |
| const struct cudaArray *array, | | const struct cudaArray *array, | |
| const struct cudaChannelFormatDesc &desc | | const struct cudaChannelFormatDesc &desc | |
| ) | | ) | |
| { | | { | |
| return cudaBindTextureToArray(&tex, array, &desc); | | return cudaBindTextureToArray(&tex, array, &desc); | |
| } | | } | |
| | | | |
| skipping to change at line 582 | | skipping to change at line 737 | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidDevicePointer, | | * ::cudaErrorInvalidDevicePointer, | |
| * ::cudaErrorInvalidTexture | | * ::cudaErrorInvalidTexture | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| e (C++ API)", | | (C++ API)", | |
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru
ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray
(C API)", | | * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru
ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray
(C API)", | |
|
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| ureToArray (C++ API)", | | reToArray (C++ API)", | |
| * \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU | |
| UnbindTexture (C++ API)", | | nbindTexture (C++ API)", | |
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di | |
| im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | | m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaBindTextureToArray( | | __inline__ __host__ cudaError_t cudaBindTextureToArray( | |
| const struct texture<T, dim, readMode> &tex, | | const struct texture<T, dim, readMode> &tex, | |
| const struct cudaArray *array | | const struct cudaArray *array | |
| ) | | ) | |
| { | | { | |
| struct cudaChannelFormatDesc desc; | | struct cudaChannelFormatDesc desc; | |
| cudaError_t err = cudaGetChannelDesc(&desc, array); | | cudaError_t err = cudaGetChannelDesc(&desc, array); | |
| | | | |
| | | | |
| skipping to change at line 620 | | skipping to change at line 776 | |
| * | | * | |
| * Unbinds the texture bound to \p tex. | | * Unbinds the texture bound to \p tex. | |
| * | | * | |
| * \param tex - Texture to unbind | | * \param tex - Texture to unbind | |
| * | | * | |
| * \return ::cudaSuccess | | * \return ::cudaSuccess | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| e (C++ API)", | | (C++ API)", | |
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| ureToArray (C++ API)", | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| l descriptor)", | | reToArray (C++ API)", | |
| | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| | | descriptor)", | |
| * \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText
ure (C API)", | | * \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText
ure (C API)", | |
|
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d
im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaUnbindTexture( | | __inline__ __host__ cudaError_t cudaUnbindTexture( | |
| const struct texture<T, dim, readMode> &tex | | const struct texture<T, dim, readMode> &tex | |
| ) | | ) | |
| { | | { | |
| return cudaUnbindTexture(&tex); | | return cudaUnbindTexture(&tex); | |
| } | | } | |
| | | | |
| /**************************************************************************
***** | | /**************************************************************************
***** | |
| | | | |
| skipping to change at line 659 | | skipping to change at line 816 | |
| * \param tex - Texture to get offset of | | * \param tex - Texture to get offset of | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidTexture, | | * ::cudaErrorInvalidTexture, | |
| * ::cudaErrorInvalidTextureBinding | | * ::cudaErrorInvalidTextureBinding | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | | * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
, | |
| * ::cudaGetChannelDesc, ::cudaGetTextureReference, | | * ::cudaGetChannelDesc, ::cudaGetTextureReference, | |
|
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur | | const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture | |
| e (C++ API)", | | (C++ API)", | |
| * \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& | | * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&, | |
| , const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip | | const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript | |
| tor)", | | or)", | |
| * \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ | | &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t | |
| t) "cudaBindTexture2D (C++ API)", | | ) "cudaBindTexture2D (C++ API)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> | |
| const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText | | &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri | |
| ureToArray (C++ API)", | | ted channel descriptor)", | |
| * \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe | | onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu | |
| l descriptor)", | | reToArray (C++ API)", | |
| * \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda | | * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c | |
| UnbindTexture (C++ API)", | | onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel | |
| | | descriptor)", | |
| | | * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU | |
| | | nbindTexture (C++ API)", | |
| * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct textureRefere
nce*) "cudaGetTextureAlignmentOffset (C API)" | | * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct textureRefere
nce*) "cudaGetTextureAlignmentOffset (C API)" | |
| */ | | */ | |
| template<class T, int dim, enum cudaTextureReadMode readMode> | | template<class T, int dim, enum cudaTextureReadMode readMode> | |
| __inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset( | | __inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset( | |
| size_t *offset, | | size_t *offset, | |
| const struct texture<T, dim, readMode> &tex | | const struct texture<T, dim, readMode> &tex | |
| ) | | ) | |
| { | | { | |
| return cudaGetTextureAlignmentOffset(offset, &tex); | | return cudaGetTextureAlignmentOffset(offset, &tex); | |
| } | | } | |
| | | | |
|
| /** @} */ /* END CUDART_HIGHLEVEL */ | | | |
| | | | |
| /**************************************************************************
***** | | /**************************************************************************
***** | |
| *
* | | *
* | |
| *
* | | *
* | |
| *
* | | *
* | |
| ***************************************************************************
****/ | | ***************************************************************************
****/ | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | | |
| * \brief Sets the preferred cache configuration for a device function | | * \brief Sets the preferred cache configuration for a device function | |
| * | | * | |
| * On devices where the L1 cache and shared memory use the same hardware | | * On devices where the L1 cache and shared memory use the same hardware | |
| * resources, this sets through \p cacheConfig the preferred cache configur
ation | | * resources, this sets through \p cacheConfig the preferred cache configur
ation | |
| * for the function specified via \p func. This is only a preference. The | | * for the function specified via \p func. This is only a preference. The | |
| * runtime will use the requested configuration if possible, but it is free
to | | * runtime will use the requested configuration if possible, but it is free
to | |
| * choose a different configuration if required to execute \p func. | | * choose a different configuration if required to execute \p func. | |
| * | | * | |
| * \p func can either be a pointer to a function that executes | | * \p func can either be a pointer to a function that executes | |
| * on the device, or it can be a character string specifying the | | * on the device, or it can be a character string specifying the | |
| * fully-decorated (C++) name for a function that executes on the device. | | * fully-decorated (C++) name for a function that executes on the device. | |
| * The parameter specified by \p func must be declared as a \p __global__ | | * The parameter specified by \p func must be declared as a \p __global__ | |
| * function. If the specified function does not exist, | | * function. If the specified function does not exist, | |
| * then ::cudaErrorInvalidDeviceFunction is returned. | | * then ::cudaErrorInvalidDeviceFunction is returned. | |
| * | | * | |
| * This setting does nothing on devices where the size of the L1 cache and | | * This setting does nothing on devices where the size of the L1 cache and | |
| * shared memory are fixed. | | * shared memory are fixed. | |
| * | | * | |
|
| * Switching between configuration modes may insert a device-side | | * Launching a kernel with a different preference than the most recent | |
| * synchronization point for streamed kernel launches. | | * preference setting may insert a device-side synchronization point. | |
| * | | * | |
|
| * \param func - Device char string naming device function | | * The supported cache configurations are: | |
| * \param cacheConfig - Cache configuration mode | | * - ::cudaFuncCachePreferNone: no preference for shared memory or L1 (defa | |
| | | ult) | |
| | | * - ::cudaFuncCachePreferShared: prefer larger shared memory and smaller L | |
| | | 1 cache | |
| | | * - ::cudaFuncCachePreferL1: prefer larger L1 cache and smaller shared mem | |
| | | ory | |
| | | * | |
| | | * \param func - Char string naming device function | |
| | | * \param cacheConfig - Requested cache configuration | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInitializationError, | | * ::cudaErrorInitializationError, | |
| * ::cudaErrorInvalidDeviceFunction | | * ::cudaErrorInvalidDeviceFunction | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa ::cudaConfigureCall, | | * \sa ::cudaConfigureCall, | |
| * \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc
SetCacheConfig (C API)", | | * \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc
SetCacheConfig (C API)", | |
| * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe
tAttributes (C++ API)", | | * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe
tAttributes (C++ API)", | |
| * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", | | * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", | |
| * ::cudaSetDoubleForDevice, | | * ::cudaSetDoubleForDevice, | |
| * ::cudaSetDoubleForHost, | | * ::cudaSetDoubleForHost, | |
|
| * \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" | | * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)", | |
| | | * ::cudaThreadGetCacheConfig, | |
| | | * ::cudaThreadSetCacheConfig | |
| */ | | */ | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaFuncSetCacheConfig( | | __inline__ __host__ cudaError_t cudaFuncSetCacheConfig( | |
| T *func, | | T *func, | |
| enum cudaFuncCache cacheConfig | | enum cudaFuncCache cacheConfig | |
| ) | | ) | |
| { | | { | |
| return cudaFuncSetCacheConfig((const char*)func, cacheConfig); | | return cudaFuncSetCacheConfig((const char*)func, cacheConfig); | |
| } | | } | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | | |
| * \brief \hl Launches a device function | | * \brief \hl Launches a device function | |
| * | | * | |
| * Launches the function \p entry on the device. The parameter \p entry can | | * Launches the function \p entry on the device. The parameter \p entry can | |
| * either be a function that executes on the device, or it can be a charact
er | | * either be a function that executes on the device, or it can be a charact
er | |
| * string, naming a function that executes on the device. The parameter | | * string, naming a function that executes on the device. The parameter | |
| * specified by \p entry must be declared as a \p __global__ function. | | * specified by \p entry must be declared as a \p __global__ function. | |
| * \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to | | * \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to | |
| * ::cudaConfigureCall() since it pops the data that was pushed by | | * ::cudaConfigureCall() since it pops the data that was pushed by | |
| * ::cudaConfigureCall() from the execution stack. | | * ::cudaConfigureCall() from the execution stack. | |
| * | | * | |
| * \param entry - Device function pointer or char string naming device func
tion | | * \param entry - Device function pointer or char string naming device func
tion | |
| * to execute | | * to execute | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidDeviceFunction, | | * ::cudaErrorInvalidDeviceFunction, | |
| * ::cudaErrorInvalidConfiguration, | | * ::cudaErrorInvalidConfiguration, | |
| * ::cudaErrorLaunchFailure, | | * ::cudaErrorLaunchFailure, | |
|
| * ::cudaErrorPriorLaunchFailure, | | | |
| * ::cudaErrorLaunchTimeout, | | * ::cudaErrorLaunchTimeout, | |
| * ::cudaErrorLaunchOutOfResources, | | * ::cudaErrorLaunchOutOfResources, | |
| * ::cudaErrorSharedObjectSymbolNotFound, | | * ::cudaErrorSharedObjectSymbolNotFound, | |
| * ::cudaErrorSharedObjectInitFailed | | * ::cudaErrorSharedObjectInitFailed | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa ::cudaConfigureCall, | | * \sa ::cudaConfigureCall, | |
| * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC
onfig (C++ API)", | | * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC
onfig (C++ API)", | |
| * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe
tAttributes (C++ API)", | | * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe
tAttributes (C++ API)", | |
| * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", | | * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", | |
| * ::cudaSetDoubleForDevice, | | * ::cudaSetDoubleForDevice, | |
| * ::cudaSetDoubleForHost, | | * ::cudaSetDoubleForHost, | |
|
| * \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" | | * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)", | |
| | | * ::cudaThreadGetCacheConfig, | |
| | | * ::cudaThreadSetCacheConfig | |
| */ | | */ | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaLaunch( | | __inline__ __host__ cudaError_t cudaLaunch( | |
| T *entry | | T *entry | |
| ) | | ) | |
| { | | { | |
| return cudaLaunch((const char*)entry); | | return cudaLaunch((const char*)entry); | |
| } | | } | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | | |
| * \brief \hl Find out attributes for a given function | | * \brief \hl Find out attributes for a given function | |
| * | | * | |
| * This function obtains the attributes of a function specified via \p entr
y. | | * This function obtains the attributes of a function specified via \p entr
y. | |
| * The parameter \p entry can either be a pointer to a function that execut
es | | * The parameter \p entry can either be a pointer to a function that execut
es | |
| * on the device, or it can be a character string specifying the | | * on the device, or it can be a character string specifying the | |
| * fully-decorated (C++) name of a function that executes on the device. Th
e | | * fully-decorated (C++) name of a function that executes on the device. Th
e | |
| * parameter specified by \p entry must be declared as a \p __global__ | | * parameter specified by \p entry must be declared as a \p __global__ | |
| * function. The fetched attributes are placed in \p attr. If the specified | | * function. The fetched attributes are placed in \p attr. If the specified | |
| * function does not exist, then ::cudaErrorInvalidDeviceFunction is return
ed. | | * function does not exist, then ::cudaErrorInvalidDeviceFunction is return
ed. | |
| * | | * | |
|
| | | * Note that some function attributes such as | |
| | | * \ref ::cudaFuncAttributes::maxThreadsPerBlock "maxThreadsPerBlock" | |
| | | * may vary based on the device that is currently being used. | |
| | | * | |
| * \param attr - Return pointer to function's attributes | | * \param attr - Return pointer to function's attributes | |
| * \param entry - Function to get attributes of | | * \param entry - Function to get attributes of | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInitializationError, | | * ::cudaErrorInitializationError, | |
| * ::cudaErrorInvalidDeviceFunction | | * ::cudaErrorInvalidDeviceFunction | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa ::cudaConfigureCall, | | * \sa ::cudaConfigureCall, | |
| * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC
onfig (C++ API)", | | * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC
onfig (C++ API)", | |
| * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, const char*) "c
udaFuncGetAttributes (C API)", | | * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, const char*) "c
udaFuncGetAttributes (C API)", | |
| * \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)", | | * \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)", | |
| * ::cudaSetDoubleForDevice, | | * ::cudaSetDoubleForDevice, | |
| * ::cudaSetDoubleForHost, | | * ::cudaSetDoubleForHost, | |
|
| * \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" | | * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)" | |
| */ | | */ | |
| template<class T> | | template<class T> | |
| __inline__ __host__ cudaError_t cudaFuncGetAttributes( | | __inline__ __host__ cudaError_t cudaFuncGetAttributes( | |
| struct cudaFuncAttributes *attr, | | struct cudaFuncAttributes *attr, | |
| T *entry | | T *entry | |
| ) | | ) | |
| { | | { | |
| return cudaFuncGetAttributes(attr, (const char*)entry); | | return cudaFuncGetAttributes(attr, (const char*)entry); | |
| } | | } | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | | |
| * \brief \hl Binds an array to a surface | | * \brief \hl Binds an array to a surface | |
| * | | * | |
| * Binds the CUDA array \p array to the surface reference \p surf. | | * Binds the CUDA array \p array to the surface reference \p surf. | |
| * \p desc describes how the memory is interpreted when dealing with | | * \p desc describes how the memory is interpreted when dealing with | |
| * the surface. Any CUDA array previously bound to \p surf is unbound. | | * the surface. Any CUDA array previously bound to \p surf is unbound. | |
| * | | * | |
| * \param surf - Surface to bind | | * \param surf - Surface to bind | |
| * \param array - Memory array on device | | * \param array - Memory array on device | |
| * \param desc - Channel format | | * \param desc - Channel format | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidSurface | | * ::cudaErrorInvalidSurface | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const
struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA
rray (C API)", | | * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const
struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA
rray (C API)", | |
|
| * \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru
ct cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descript
or)" | | * \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc
t cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descripto
r)" | |
| */ | | */ | |
| template<class T, int dim> | | template<class T, int dim> | |
| __inline__ __host__ cudaError_t cudaBindSurfaceToArray( | | __inline__ __host__ cudaError_t cudaBindSurfaceToArray( | |
| const struct surface<T, dim> &surf, | | const struct surface<T, dim> &surf, | |
| const struct cudaArray *array, | | const struct cudaArray *array, | |
| const struct cudaChannelFormatDesc &desc | | const struct cudaChannelFormatDesc &desc | |
| ) | | ) | |
| { | | { | |
| return cudaBindSurfaceToArray(&surf, array, &desc); | | return cudaBindSurfaceToArray(&surf, array, &desc); | |
| } | | } | |
| | | | |
| /** | | /** | |
|
| * \ingroup CUDART_HIGHLEVEL | | | |
| * \brief \hl Binds an array to a surface | | * \brief \hl Binds an array to a surface | |
| * | | * | |
| * Binds the CUDA array \p array to the surface reference \p surf. | | * Binds the CUDA array \p array to the surface reference \p surf. | |
| * The channel descriptor is inherited from the CUDA array. Any CUDA array | | * The channel descriptor is inherited from the CUDA array. Any CUDA array | |
| * previously bound to \p surf is unbound. | | * previously bound to \p surf is unbound. | |
| * | | * | |
| * \param surf - Surface to bind | | * \param surf - Surface to bind | |
| * \param array - Memory array on device | | * \param array - Memory array on device | |
| * | | * | |
| * \return | | * \return | |
| * ::cudaSuccess, | | * ::cudaSuccess, | |
| * ::cudaErrorInvalidValue, | | * ::cudaErrorInvalidValue, | |
| * ::cudaErrorInvalidSurface | | * ::cudaErrorInvalidSurface | |
| * \notefnerr | | * \notefnerr | |
| * | | * | |
| * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const
struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA
rray (C API)", | | * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const
struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA
rray (C API)", | |
|
| * \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru
ct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray
(C++ API)" | | * \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc
t cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray
(C++ API)" | |
| */ | | */ | |
| template<class T, int dim> | | template<class T, int dim> | |
| __inline__ __host__ cudaError_t cudaBindSurfaceToArray( | | __inline__ __host__ cudaError_t cudaBindSurfaceToArray( | |
| const struct surface<T, dim> &surf, | | const struct surface<T, dim> &surf, | |
| const struct cudaArray *array | | const struct cudaArray *array | |
| ) | | ) | |
| { | | { | |
| struct cudaChannelFormatDesc desc; | | struct cudaChannelFormatDesc desc; | |
| cudaError_t err = cudaGetChannelDesc(&desc, array); | | cudaError_t err = cudaGetChannelDesc(&desc, array); | |
| | | | |
| return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e
rr; | | return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e
rr; | |
| } | | } | |
| | | | |
| #endif /* __CUDACC__ */ | | #endif /* __CUDACC__ */ | |
| | | | |
|
| | | /** @} */ /* END CUDART_HIGHLEVEL */ | |
| | | | |
| #endif /* __cplusplus */ | | #endif /* __cplusplus */ | |
| | | | |
| #endif /* !__CUDA_RUNTIME_H__ */ | | #endif /* !__CUDA_RUNTIME_H__ */ | |
| | | | |
End of changes. 41 change blocks. |
| 147 lines changed or deleted | | 367 lines changed or added | |
|