cl.h   cl.h 
/************************************************************************** ***** /************************************************************************** *****
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2010 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
skipping to change at line 24 skipping to change at line 24
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
************************************************************************** ****/ ************************************************************************** ****/
/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */ /* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */
#ifndef __OPENCL_CL_H #ifndef __OPENCL_CL_H
#define __OPENCL_CL_H #define __OPENCL_CL_H
#ifdef __APPLE__ #ifdef __APPLE__
#include <OpenCL/cl_platform.h> #include <OpenCL/cl_platform.h>
#else #else
#include <CL/cl_platform.h> #include <CL/cl_platform.h>
#endif #endif
skipping to change at line 56 skipping to change at line 56
typedef struct _cl_program * cl_program; typedef struct _cl_program * cl_program;
typedef struct _cl_kernel * cl_kernel; typedef struct _cl_kernel * cl_kernel;
typedef struct _cl_event * cl_event; typedef struct _cl_event * cl_event;
typedef struct _cl_sampler * cl_sampler; typedef struct _cl_sampler * cl_sampler;
typedef cl_uint cl_bool; /* WARNING! Unlik e cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_uint cl_bool; /* WARNING! Unlik e cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef cl_ulong cl_bitfield; typedef cl_ulong cl_bitfield;
typedef cl_bitfield cl_device_type; typedef cl_bitfield cl_device_type;
typedef cl_uint cl_platform_info; typedef cl_uint cl_platform_info;
typedef cl_uint cl_device_info; typedef cl_uint cl_device_info;
typedef cl_bitfield cl_device_address_info;
typedef cl_bitfield cl_device_fp_config; typedef cl_bitfield cl_device_fp_config;
typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_mem_cache_type;
typedef cl_uint cl_device_local_mem_type; typedef cl_uint cl_device_local_mem_type;
typedef cl_bitfield cl_device_exec_capabilities; typedef cl_bitfield cl_device_exec_capabilities;
typedef cl_bitfield cl_command_queue_properties; typedef cl_bitfield cl_command_queue_properties;
typedef intptr_t cl_context_properties; typedef intptr_t cl_context_properties;
typedef cl_uint cl_context_info; typedef cl_uint cl_context_info;
typedef cl_uint cl_command_queue_info; typedef cl_uint cl_command_queue_info;
typedef cl_uint cl_channel_order; typedef cl_uint cl_channel_order;
typedef cl_uint cl_channel_type; typedef cl_uint cl_channel_type;
typedef cl_bitfield cl_mem_flags; typedef cl_bitfield cl_mem_flags;
typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_object_type;
typedef cl_uint cl_mem_info; typedef cl_uint cl_mem_info;
typedef cl_uint cl_image_info; typedef cl_uint cl_image_info;
typedef cl_uint cl_buffer_create_type;
typedef cl_uint cl_addressing_mode; typedef cl_uint cl_addressing_mode;
typedef cl_uint cl_filter_mode; typedef cl_uint cl_filter_mode;
typedef cl_uint cl_sampler_info; typedef cl_uint cl_sampler_info;
typedef cl_bitfield cl_map_flags; typedef cl_bitfield cl_map_flags;
typedef cl_uint cl_program_info; typedef cl_uint cl_program_info;
typedef cl_uint cl_program_build_info; typedef cl_uint cl_program_build_info;
typedef cl_int cl_build_status; typedef cl_int cl_build_status;
typedef cl_uint cl_kernel_info; typedef cl_uint cl_kernel_info;
typedef cl_uint cl_kernel_work_group_info; typedef cl_uint cl_kernel_work_group_info;
typedef cl_uint cl_event_info; typedef cl_uint cl_event_info;
typedef cl_uint cl_command_type; typedef cl_uint cl_command_type;
typedef cl_uint cl_profiling_info; typedef cl_uint cl_profiling_info;
typedef struct _cl_image_format { typedef struct _cl_image_format {
cl_channel_order image_channel_order; cl_channel_order image_channel_order;
cl_channel_type image_channel_data_type; cl_channel_type image_channel_data_type;
} cl_image_format; } cl_image_format;
typedef struct _cl_buffer_region {
size_t origin;
size_t size;
} cl_buffer_region;
/************************************************************************** ****/ /************************************************************************** ****/
/* Error Codes */ /* Error Codes */
#define CL_SUCCESS 0 #define CL_SUCCESS 0
#define CL_DEVICE_NOT_FOUND -1 #define CL_DEVICE_NOT_FOUND -1
#define CL_DEVICE_NOT_AVAILABLE -2 #define CL_DEVICE_NOT_AVAILABLE -2
#define CL_COMPILER_NOT_AVAILABLE -3 #define CL_COMPILER_NOT_AVAILABLE -3
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
#define CL_OUT_OF_RESOURCES -5 #define CL_OUT_OF_RESOURCES -5
#define CL_OUT_OF_HOST_MEMORY -6 #define CL_OUT_OF_HOST_MEMORY -6
#define CL_PROFILING_INFO_NOT_AVAILABLE -7 #define CL_PROFILING_INFO_NOT_AVAILABLE -7
#define CL_MEM_COPY_OVERLAP -8 #define CL_MEM_COPY_OVERLAP -8
#define CL_IMAGE_FORMAT_MISMATCH -9 #define CL_IMAGE_FORMAT_MISMATCH -9
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
#define CL_BUILD_PROGRAM_FAILURE -11 #define CL_BUILD_PROGRAM_FAILURE -11
#define CL_MAP_FAILURE -12 #define CL_MAP_FAILURE -12
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
#define CL_INVALID_VALUE -30 #define CL_INVALID_VALUE -30
#define CL_INVALID_DEVICE_TYPE -31 #define CL_INVALID_DEVICE_TYPE -31
#define CL_INVALID_PLATFORM -32 #define CL_INVALID_PLATFORM -32
#define CL_INVALID_DEVICE -33 #define CL_INVALID_DEVICE -33
#define CL_INVALID_CONTEXT -34 #define CL_INVALID_CONTEXT -34
#define CL_INVALID_QUEUE_PROPERTIES -35 #define CL_INVALID_QUEUE_PROPERTIES -35
#define CL_INVALID_COMMAND_QUEUE -36 #define CL_INVALID_COMMAND_QUEUE -36
#define CL_INVALID_HOST_PTR -37 #define CL_INVALID_HOST_PTR -37
#define CL_INVALID_MEM_OBJECT -38 #define CL_INVALID_MEM_OBJECT -38
skipping to change at line 144 skipping to change at line 151
#define CL_INVALID_EVENT_WAIT_LIST -57 #define CL_INVALID_EVENT_WAIT_LIST -57
#define CL_INVALID_EVENT -58 #define CL_INVALID_EVENT -58
#define CL_INVALID_OPERATION -59 #define CL_INVALID_OPERATION -59
#define CL_INVALID_GL_OBJECT -60 #define CL_INVALID_GL_OBJECT -60
#define CL_INVALID_BUFFER_SIZE -61 #define CL_INVALID_BUFFER_SIZE -61
#define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_MIP_LEVEL -62
#define CL_INVALID_GLOBAL_WORK_SIZE -63 #define CL_INVALID_GLOBAL_WORK_SIZE -63
/* OpenCL Version */ /* OpenCL Version */
#define CL_VERSION_1_0 1 #define CL_VERSION_1_0 1
#define CL_VERSION_1_1 1
/* cl_bool */ /* cl_bool */
#define CL_FALSE 0 #define CL_FALSE 0
#define CL_TRUE 1 #define CL_TRUE 1
/* cl_platform_info */ /* cl_platform_info */
#define CL_PLATFORM_PROFILE 0x0900 #define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901 #define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902 #define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903 #define CL_PLATFORM_VENDOR 0x0903
skipping to change at line 216 skipping to change at line 224
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A #define CL_DEVICE_QUEUE_PROPERTIES 0x102A
#define CL_DEVICE_NAME 0x102B #define CL_DEVICE_NAME 0x102B
#define CL_DEVICE_VENDOR 0x102C #define CL_DEVICE_VENDOR 0x102C
#define CL_DRIVER_VERSION 0x102D #define CL_DRIVER_VERSION 0x102D
#define CL_DEVICE_PROFILE 0x102E #define CL_DEVICE_PROFILE 0x102E
#define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_VERSION 0x102F
#define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_EXTENSIONS 0x1030
#define CL_DEVICE_PLATFORM 0x1031 #define CL_DEVICE_PLATFORM 0x1031
/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ /* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
/* cl_device_fp_config - bitfield */ /* cl_device_fp_config - bitfield */
#define CL_FP_DENORM (1 << 0) #define CL_FP_DENORM (1 << 0)
#define CL_FP_INF_NAN (1 << 1) #define CL_FP_INF_NAN (1 << 1)
#define CL_FP_ROUND_TO_NEAREST (1 << 2) #define CL_FP_ROUND_TO_NEAREST (1 << 2)
#define CL_FP_ROUND_TO_ZERO (1 << 3) #define CL_FP_ROUND_TO_ZERO (1 << 3)
#define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_ROUND_TO_INF (1 << 4)
#define CL_FP_FMA (1 << 5) #define CL_FP_FMA (1 << 5)
#define CL_FP_SOFT_FLOAT (1 << 6)
/* cl_device_mem_cache_type */ /* cl_device_mem_cache_type */
#define CL_NONE 0x0 #define CL_NONE 0x0
#define CL_READ_ONLY_CACHE 0x1 #define CL_READ_ONLY_CACHE 0x1
#define CL_READ_WRITE_CACHE 0x2 #define CL_READ_WRITE_CACHE 0x2
/* cl_device_local_mem_type */ /* cl_device_local_mem_type */
#define CL_LOCAL 0x1 #define CL_LOCAL 0x1
#define CL_GLOBAL 0x2 #define CL_GLOBAL 0x2
skipping to change at line 246 skipping to change at line 265
#define CL_EXEC_NATIVE_KERNEL (1 << 1) #define CL_EXEC_NATIVE_KERNEL (1 << 1)
/* cl_command_queue_properties - bitfield */ /* cl_command_queue_properties - bitfield */
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
#define CL_QUEUE_PROFILING_ENABLE (1 << 1) #define CL_QUEUE_PROFILING_ENABLE (1 << 1)
/* cl_context_info */ /* cl_context_info */
#define CL_CONTEXT_REFERENCE_COUNT 0x1080 #define CL_CONTEXT_REFERENCE_COUNT 0x1080
#define CL_CONTEXT_DEVICES 0x1081 #define CL_CONTEXT_DEVICES 0x1081
#define CL_CONTEXT_PROPERTIES 0x1082 #define CL_CONTEXT_PROPERTIES 0x1082
#define CL_CONTEXT_NUM_DEVICES 0x1083
/* cl_context_info + cl_context_properties */ /* cl_context_info + cl_context_properties */
#define CL_CONTEXT_PLATFORM 0x1084 #define CL_CONTEXT_PLATFORM 0x1084
/* cl_command_queue_info */ /* cl_command_queue_info */
#define CL_QUEUE_CONTEXT 0x1090 #define CL_QUEUE_CONTEXT 0x1090
#define CL_QUEUE_DEVICE 0x1091 #define CL_QUEUE_DEVICE 0x1091
#define CL_QUEUE_REFERENCE_COUNT 0x1092 #define CL_QUEUE_REFERENCE_COUNT 0x1092
#define CL_QUEUE_PROPERTIES 0x1093 #define CL_QUEUE_PROPERTIES 0x1093
skipping to change at line 275 skipping to change at line 295
#define CL_R 0x10B0 #define CL_R 0x10B0
#define CL_A 0x10B1 #define CL_A 0x10B1
#define CL_RG 0x10B2 #define CL_RG 0x10B2
#define CL_RA 0x10B3 #define CL_RA 0x10B3
#define CL_RGB 0x10B4 #define CL_RGB 0x10B4
#define CL_RGBA 0x10B5 #define CL_RGBA 0x10B5
#define CL_BGRA 0x10B6 #define CL_BGRA 0x10B6
#define CL_ARGB 0x10B7 #define CL_ARGB 0x10B7
#define CL_INTENSITY 0x10B8 #define CL_INTENSITY 0x10B8
#define CL_LUMINANCE 0x10B9 #define CL_LUMINANCE 0x10B9
#define CL_Rx 0x10BA
#define CL_RGx 0x10BB
#define CL_RGBx 0x10BC
/* cl_channel_type */ /* cl_channel_type */
#define CL_SNORM_INT8 0x10D0 #define CL_SNORM_INT8 0x10D0
#define CL_SNORM_INT16 0x10D1 #define CL_SNORM_INT16 0x10D1
#define CL_UNORM_INT8 0x10D2 #define CL_UNORM_INT8 0x10D2
#define CL_UNORM_INT16 0x10D3 #define CL_UNORM_INT16 0x10D3
#define CL_UNORM_SHORT_565 0x10D4 #define CL_UNORM_SHORT_565 0x10D4
#define CL_UNORM_SHORT_555 0x10D5 #define CL_UNORM_SHORT_555 0x10D5
#define CL_UNORM_INT_101010 0x10D6 #define CL_UNORM_INT_101010 0x10D6
#define CL_SIGNED_INT8 0x10D7 #define CL_SIGNED_INT8 0x10D7
skipping to change at line 306 skipping to change at line 329
#define CL_MEM_OBJECT_IMAGE3D 0x10F2 #define CL_MEM_OBJECT_IMAGE3D 0x10F2
/* cl_mem_info */ /* cl_mem_info */
#define CL_MEM_TYPE 0x1100 #define CL_MEM_TYPE 0x1100
#define CL_MEM_FLAGS 0x1101 #define CL_MEM_FLAGS 0x1101
#define CL_MEM_SIZE 0x1102 #define CL_MEM_SIZE 0x1102
#define CL_MEM_HOST_PTR 0x1103 #define CL_MEM_HOST_PTR 0x1103
#define CL_MEM_MAP_COUNT 0x1104 #define CL_MEM_MAP_COUNT 0x1104
#define CL_MEM_REFERENCE_COUNT 0x1105 #define CL_MEM_REFERENCE_COUNT 0x1105
#define CL_MEM_CONTEXT 0x1106 #define CL_MEM_CONTEXT 0x1106
#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
#define CL_MEM_OFFSET 0x1108
/* cl_image_info */ /* cl_image_info */
#define CL_IMAGE_FORMAT 0x1110 #define CL_IMAGE_FORMAT 0x1110
#define CL_IMAGE_ELEMENT_SIZE 0x1111 #define CL_IMAGE_ELEMENT_SIZE 0x1111
#define CL_IMAGE_ROW_PITCH 0x1112 #define CL_IMAGE_ROW_PITCH 0x1112
#define CL_IMAGE_SLICE_PITCH 0x1113 #define CL_IMAGE_SLICE_PITCH 0x1113
#define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_WIDTH 0x1114
#define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_HEIGHT 0x1115
#define CL_IMAGE_DEPTH 0x1116 #define CL_IMAGE_DEPTH 0x1116
/* cl_addressing_mode */ /* cl_addressing_mode */
#define CL_ADDRESS_NONE 0x1130 #define CL_ADDRESS_NONE 0x1130
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
#define CL_ADDRESS_CLAMP 0x1132 #define CL_ADDRESS_CLAMP 0x1132
#define CL_ADDRESS_REPEAT 0x1133 #define CL_ADDRESS_REPEAT 0x1133
#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
/* cl_filter_mode */ /* cl_filter_mode */
#define CL_FILTER_NEAREST 0x1140 #define CL_FILTER_NEAREST 0x1140
#define CL_FILTER_LINEAR 0x1141 #define CL_FILTER_LINEAR 0x1141
/* cl_sampler_info */ /* cl_sampler_info */
#define CL_SAMPLER_REFERENCE_COUNT 0x1150 #define CL_SAMPLER_REFERENCE_COUNT 0x1150
#define CL_SAMPLER_CONTEXT 0x1151 #define CL_SAMPLER_CONTEXT 0x1151
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152
#define CL_SAMPLER_ADDRESSING_MODE 0x1153 #define CL_SAMPLER_ADDRESSING_MODE 0x1153
skipping to change at line 368 skipping to change at line 394
#define CL_KERNEL_FUNCTION_NAME 0x1190 #define CL_KERNEL_FUNCTION_NAME 0x1190
#define CL_KERNEL_NUM_ARGS 0x1191 #define CL_KERNEL_NUM_ARGS 0x1191
#define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_REFERENCE_COUNT 0x1192
#define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_CONTEXT 0x1193
#define CL_KERNEL_PROGRAM 0x1194 #define CL_KERNEL_PROGRAM 0x1194
/* cl_kernel_work_group_info */ /* cl_kernel_work_group_info */
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
/* cl_event_info */ /* cl_event_info */
#define CL_EVENT_COMMAND_QUEUE 0x11D0 #define CL_EVENT_COMMAND_QUEUE 0x11D0
#define CL_EVENT_COMMAND_TYPE 0x11D1 #define CL_EVENT_COMMAND_TYPE 0x11D1
#define CL_EVENT_REFERENCE_COUNT 0x11D2 #define CL_EVENT_REFERENCE_COUNT 0x11D2
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
#define CL_EVENT_CONTEXT 0x11D4
/* cl_command_type */ /* cl_command_type */
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0
#define CL_COMMAND_TASK 0x11F1 #define CL_COMMAND_TASK 0x11F1
#define CL_COMMAND_NATIVE_KERNEL 0x11F2 #define CL_COMMAND_NATIVE_KERNEL 0x11F2
#define CL_COMMAND_READ_BUFFER 0x11F3 #define CL_COMMAND_READ_BUFFER 0x11F3
#define CL_COMMAND_WRITE_BUFFER 0x11F4 #define CL_COMMAND_WRITE_BUFFER 0x11F4
#define CL_COMMAND_COPY_BUFFER 0x11F5 #define CL_COMMAND_COPY_BUFFER 0x11F5
#define CL_COMMAND_READ_IMAGE 0x11F6 #define CL_COMMAND_READ_IMAGE 0x11F6
#define CL_COMMAND_WRITE_IMAGE 0x11F7 #define CL_COMMAND_WRITE_IMAGE 0x11F7
#define CL_COMMAND_COPY_IMAGE 0x11F8 #define CL_COMMAND_COPY_IMAGE 0x11F8
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
#define CL_COMMAND_MAP_BUFFER 0x11FB #define CL_COMMAND_MAP_BUFFER 0x11FB
#define CL_COMMAND_MAP_IMAGE 0x11FC #define CL_COMMAND_MAP_IMAGE 0x11FC
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
#define CL_COMMAND_MARKER 0x11FE #define CL_COMMAND_MARKER 0x11FE
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
#define CL_COMMAND_READ_BUFFER_RECT 0x1201
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
#define CL_COMMAND_USER 0x1204
/* command execution status */ /* command execution status */
#define CL_COMPLETE 0x0 #define CL_COMPLETE 0x0
#define CL_RUNNING 0x1 #define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2 #define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3 #define CL_QUEUED 0x3
/* cl_buffer_create_type */
#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
/* cl_profiling_info */ /* cl_profiling_info */
#define CL_PROFILING_COMMAND_QUEUED 0x1280 #define CL_PROFILING_COMMAND_QUEUED 0x1280
#define CL_PROFILING_COMMAND_SUBMIT 0x1281 #define CL_PROFILING_COMMAND_SUBMIT 0x1281
#define CL_PROFILING_COMMAND_START 0x1282 #define CL_PROFILING_COMMAND_START 0x1282
#define CL_PROFILING_COMMAND_END 0x1283 #define CL_PROFILING_COMMAND_END 0x1283
/************************************************************************** ******************************/ /************************************************************************** ******************************/
/* Platform API */ /* Platform API */
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
skipping to change at line 441 skipping to change at line 477
cl_device_info /* param_name */, cl_device_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0; size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;
/* Context APIs */ /* Context APIs */
extern CL_API_ENTRY cl_context CL_API_CALL extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContext(const cl_context_properties * /* properties */, clCreateContext(const cl_context_properties * /* properties */,
cl_uint /* num_devices */, cl_uint /* num_devices */,
const cl_device_id * /* devices */, const cl_device_id * /* devices */,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const v oid *, size_t, void *),
void * /* user_data */, void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUF FIX__VERSION_1_0; cl_int * /* errcode_ret */) CL_API_SUF FIX__VERSION_1_0;
extern CL_API_ENTRY cl_context CL_API_CALL extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContextFromType(const cl_context_properties * /* properties */, clCreateContextFromType(const cl_context_properties * /* properties */,
cl_device_type /* device_type */, cl_device_type /* device_type */,
void (*pfn_notify)(const char *, const void *, size _t, void *) /* pfn_notify */, void (CL_CALLBACK * /* pfn_notify*/ )(const cha r *, const void *, size_t, void *),
void * /* user_data */, void * /* user_data */,
cl_int * /* errcode_ret */) CL _API_SUFFIX__VERSION_1_0; cl_int * /* errcode_ret */) CL _API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
skipping to change at line 485 skipping to change at line 521
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__ VERSION_1_0; clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__ VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetCommandQueueInfo(cl_command_queue /* command_queue */, clGetCommandQueueInfo(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */, cl_command_queue_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0; size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsup
ported and untested in OpenCL 1.1!
/*
* WARNING:
* This API introduces mutable state into the OpenCL implementation. It
has been REMOVED
* to better facilitate thread safety. The 1.0 API is not thread safe. It
is not tested by the
* OpenCL 1.1 conformance test, and consequently may not work or may not w
ork dependably.
* It is likely to be non-performant. Use of this API is not advised. Use
at your own risk.
*
* Software developers previously relying on this API are instructed to se
t the command queue
* properties when creating the queue, instead.
*/
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clSetCommandQueueProperty(cl_command_queue /* command_queue */ , clSetCommandQueueProperty(cl_command_queue /* command_queue */ ,
cl_command_queue_properties /* properties */, cl_command_queue_properties /* properties */,
cl_bool /* enable */, cl_bool /* enable */,
cl_command_queue_properties * /* old_properties * cl_command_queue_properties * /* old_properties *
/) CL_API_SUFFIX__VERSION_1_0; /) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
/* Memory Object APIs */ /* Memory Object APIs */
extern CL_API_ENTRY cl_mem CL_API_CALL extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBuffer(cl_context /* context */, clCreateBuffer(cl_context /* context */,
cl_mem_flags /* flags */, cl_mem_flags /* flags */,
size_t /* size */, size_t /* size */,
void * /* host_ptr */, void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateSubBuffer(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */) CL_API_SUFFIX
__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateImage2D(cl_context /* context */, clCreateImage2D(cl_context /* context */,
cl_mem_flags /* flags */, cl_mem_flags /* flags */,
const cl_image_format * /* image_format */, const cl_image_format * /* image_format */,
size_t /* image_width */, size_t /* image_width */,
size_t /* image_height */, size_t /* image_height */,
size_t /* image_row_pitch */, size_t /* image_row_pitch */,
void * /* host_ptr */, void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__V ERSION_1_0; cl_int * /* errcode_ret */) CL_API_SUFFIX__V ERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL extern CL_API_ENTRY cl_mem CL_API_CALL
skipping to change at line 549 skipping to change at line 605
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFF IX__VERSION_1_0; size_t * /* param_value_size_ret */) CL_API_SUFF IX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetImageInfo(cl_mem /* image */, clGetImageInfo(cl_mem /* image */,
cl_image_info /* param_name */, cl_image_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0; size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetMemObjectDestructorCallback( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl
_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_
API_SUFFIX__VERSION_1_1;
/* Sampler APIs */ /* Sampler APIs */
extern CL_API_ENTRY cl_sampler CL_API_CALL extern CL_API_ENTRY cl_sampler CL_API_CALL
clCreateSampler(cl_context /* context */, clCreateSampler(cl_context /* context */,
cl_bool /* normalized_coords */, cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */, cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */, cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSI ON_1_0; cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSI ON_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
skipping to change at line 598 skipping to change at line 659
clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clBuildProgram(cl_program /* program */, clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */, cl_uint /* num_devices */,
const cl_device_id * /* device_list */, const cl_device_id * /* device_list */,
const char * /* options */, const char * /* options */,
void (*pfn_notify)(cl_program /* program */, void * /* user_ data */), void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION _1_0; void * /* user_data */) CL_API_SUFFIX__VERSION _1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramInfo(cl_program /* program */, clGetProgramInfo(cl_program /* program */,
cl_program_info /* param_name */, cl_program_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
skipping to change at line 670 skipping to change at line 731
clWaitForEvents(cl_uint /* num_events */, clWaitForEvents(cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSIO N_1_0; const cl_event * /* event_list */) CL_API_SUFFIX__VERSIO N_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventInfo(cl_event /* event */, clGetEventInfo(cl_event /* event */,
cl_event_info /* param_name */, cl_event_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0; size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateUserEvent(cl_context /* context */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1
_1;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetUserEventStatus(cl_event /* event */,
cl_int /* execution_status */) CL_API_SUFFIX__VERS
ION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetEventCallback( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int,
void *),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_1
;
/* Profiling APIs */ /* Profiling APIs */
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventProfilingInfo(cl_event /* event */, clGetEventProfilingInfo(cl_event /* event */,
cl_profiling_info /* param_name */, cl_profiling_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0; size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;
/* Flush and Finish APIs */ /* Flush and Finish APIs */
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
skipping to change at line 704 skipping to change at line 779
cl_mem /* buffer */, cl_mem /* buffer */,
cl_bool /* blocking_read */, cl_bool /* blocking_read */,
size_t /* offset */, size_t /* offset */,
size_t /* cb */, size_t /* cb */,
void * /* ptr */, void * /* ptr */,
cl_uint /* num_events_in_wait_list */, cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */, const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0; cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VER
SION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBuffer(cl_command_queue /* command_queue */, clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */, cl_mem /* buffer */,
cl_bool /* blocking_write */, cl_bool /* blocking_write */,
size_t /* offset */, size_t /* offset */,
size_t /* cb */, size_t /* cb */,
const void * /* ptr */, const void * /* ptr */,
cl_uint /* num_events_in_wait_list */, cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */, const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0; cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VE
RSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBuffer(cl_command_queue /* command_queue */, clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */, cl_mem /* src_buffer */,
cl_mem /* dst_buffer */, cl_mem /* dst_buffer */,
size_t /* src_offset */, size_t /* src_offset */,
size_t /* dst_offset */, size_t /* dst_offset */,
size_t /* cb */, size_t /* cb */,
cl_uint /* num_events_in_wait_list */, cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */, const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0; cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VER
SION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadImage(cl_command_queue /* command_queue */, clEnqueueReadImage(cl_command_queue /* command_queue */,
cl_mem /* image */, cl_mem /* image */,
cl_bool /* blocking_read */, cl_bool /* blocking_read */,
const size_t * /* origin[3] */, const size_t * /* origin[3] */,
const size_t * /* region[3] */, const size_t * /* region[3] */,
size_t /* row_pitch */, size_t /* row_pitch */,
size_t /* slice_pitch */, size_t /* slice_pitch */,
void * /* ptr */, void * /* ptr */,
cl_uint /* num_events_in_wait_list */, cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */, const cl_event * /* event_wait_list */,
 End of changes. 30 change blocks. 
9 lines changed or deleted 147 lines changed or added


 cl_ext.h   cl_ext.h 
/************************************************************************** ***** /************************************************************************** *****
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2010 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
skipping to change at line 24 skipping to change at line 24
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
************************************************************************** ****/ ************************************************************************** ****/
/* $Revision$ on $Date$ */ /* $Revision: 11687 $ on $Date: 2010-06-12 03:47:22 +0530 (Sat, 12 Jun 2010
) $ */
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H #ifndef __CL_EXT_H
#define __CL_EXT_H #define __CL_EXT_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <AvailabilityMacros.h>
#else
#include <CL/cl.h>
#endif
/* cl_khr_fp64 extension - no extension #define since it has no functions */ /* cl_khr_fp64 extension - no extension #define since it has no functions */
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
/* cl_khr_fp16 extension - no extension #define since it has no functions */ /* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* cl_khr_icd extension /* Memory object destruction
*/ *
* Apple extension for use to manage externally allocated buffers used with
cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory o
bject is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified use
r callback function on a callback
* stack associated with memobj. The registered user callback functions are
called in the reverse order in
* which they were registered. The user callback functions are called and t
hen the memory object is deleted
* and its resources freed. This provides a mechanism for the application (
and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memo
ry object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to t
he pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using c
lGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /*
memobj */, void* /*user_data*/),
void * /*user_data */ )
CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_
notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using
clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System
Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* er
rstr */,
const void * /* private_info */
,
size_t /* cb */,
void * /* user_data */ )
CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdo
ut */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errs
tr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ )
CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stde
rr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errs
tr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ )
CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1 #define cl_khr_icd 1
/* cl_platform_info */ /* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */ /* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001 #define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */, cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */); cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it h as no functions */ /* cl_nv_device_attribute_query extension - no extension #define since it h as no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
#ifdef CL_VERSION_1_1
/***********************************
* cl_ext_device_fission extension *
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_
1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_
SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1
;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_S
UFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* pro
perties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERS
ION_1_1;
extern CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_p
roperty_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_device
s*/,
cl_uint * /*num_devices*/ )
CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partiti
on_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partiti
on_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partiti
on_property_ext) 0 - 1)
#endif /* CL_VERSION_1_1 */
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /* __CL_EXT_H */ #endif /* __CL_EXT_H */
 End of changes. 6 change blocks. 
4 lines changed or deleted 178 lines changed or added


 cl_gl.h   cl_gl.h 
/************************************************************************** ******** /************************************************************************** ********
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2010 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
skipping to change at line 24 skipping to change at line 24
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
************************************************************************** ********/ ************************************************************************** ********/
/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */ /* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */
/* /*
* cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
* OpenGL dependencies. The application is responsible for #including * OpenGL dependencies. The application is responsible for #including
* OpenGL or OpenGL ES headers before #including cl_gl.h. * OpenGL or OpenGL ES headers before #including cl_gl.h.
*/ */
#ifndef __OPENCL_CL_GL_H #ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H
#ifdef __APPLE__ #ifdef __APPLE__
#include <OpenCL/cl.h> #include <OpenCL/cl.h>
#include <OpenGL/CGLDevice.h>
#else #else
#include <CL/cl.h> #include <CL/cl.h>
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef cl_uint cl_gl_object_type; typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info; typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type */ /* cl_gl_object_type */
#define CL_GL_OBJECT_BUFFER 0x2000 #define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001 #define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002 #define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003 #define CL_GL_OBJECT_RENDERBUFFER 0x2003
/* cl_gl_texture_info */ /* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004 #define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005 #define CL_GL_MIPMAP_LEVEL 0x2005
skipping to change at line 142 skipping to change at line 144
#define CL_WGL_HDC_KHR 0x200B #define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C #define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */, clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */, cl_gl_context_info /* param_name */,
size_t /* param_value_size */, size_t /* param_value_size */,
void * /* param_value */, void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /* __OPENCL_CL_GL_H */ #endif /* __OPENCL_CL_GL_H */
 End of changes. 5 change blocks. 
2 lines changed or deleted 11 lines changed or added


 cl_gl_ext.h   cl_gl_ext.h 
/************************************************************************** ******** /************************************************************************** ********
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2010 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
skipping to change at line 24 skipping to change at line 24
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
************************************************************************** ********/ ************************************************************************** ********/
/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */ /* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */ /* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H #ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H #define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifdef __APPLE__
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl_gl.h>
#endif
/* /*
* For each extension, follow this template * For each extension, follow this template
* /* cl_VEN_extname extension */ * /* cl_VEN_extname extension */
/* #define cl_VEN_extname 1 /* #define cl_VEN_extname 1
* ... define new types, if any * ... define new types, if any
* ... define new tokens, if any * ... define new tokens, if any
* ... define new APIs, if any * ... define new APIs, if any
* *
* If you need GLtypes here, mirror them with a cl_GLtype, rather than inc luding a GL header * If you need GLtypes here, mirror them with a cl_GLtype, rather than inc luding a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here. * This allows us to avoid having to decide whether to include GL headers or GLES here.
*/ */
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_S
UFFIX__VERSION_1_1;
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /* __OPENCL_CL_GL_EXT_H */ #endif /* __OPENCL_CL_GL_EXT_H */
 End of changes. 4 change blocks. 
2 lines changed or deleted 20 lines changed or added


 common_functions.h   common_functions.h 
skipping to change at line 68 skipping to change at line 68
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __host__ __device__ void* __cdecl memset(void*, int, size_ t) __THROW; extern __host__ __device__ void* __cdecl memset(void*, int, size_ t) __THROW;
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __host__ __device__ void* __cdecl memcpy(void*, const void *, size_t) __THROW; extern __host__ __device__ void* __cdecl memcpy(void*, const void *, size_t) __THROW;
} }
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 200 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 200
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
extern "C" extern "C"
{ {
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __host__ __device__ int __cdecl printf(const char*, ...) extern _CRTIMP __host__ __device__ int __cdecl printf(const char*, ...)
; ;
extern _CRTIMP __host__ __device__ void* __cdecl malloc(size_t) __THROW;
extern _CRTIMP __host__ __device__ void __cdecl free(void*) __THROW;
} }
#endif /* __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 */ #endif /* __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 */
#endif /* __cplusplus && __CUDACC__ */ #endif /* __cplusplus && __CUDACC__ */
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
 End of changes. 2 change blocks. 
2 lines changed or deleted 5 lines changed or added


 cuComplex.h   cuComplex.h 
skipping to change at line 46 skipping to change at line 46
#if !defined(CU_COMPLEX_H_) #if !defined(CU_COMPLEX_H_)
#define CU_COMPLEX_H_ #define CU_COMPLEX_H_
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
#include <math.h> /* import fabsf, sqrt */ #include <math.h> /* import fabsf, sqrt */
#include "vector_types.h" #include "vector_types.h"
/* versions for hosts without native support for 'complex' */
#if (!defined(__CUDACC__) && defined(CU_USE_NATIVE_COMPLEX))
#include <complex.h>
/* wrapper functions around C99 native complex support. NOTE: Untested! */
/* -- Single Precision -- */
typedef complex cuFloatComplex;
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x) {
return crealf(x);
}
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x) {
return cimagf(x);
}
__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
(float x, floa
t y)
{
return x + I * y;
}
__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComple
x x)
{
return conjf (x);
}
__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComple
x x,
cuFloatComple
x y)
{
return x + y;
}
__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComple
x x,
cuFloatComple
x y)
{
return x - y;
}
__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComple
x x,
cuFloatComple
x y)
{
return x * y;
}
__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComple
x x,
cuFloatComple
x y)
{
return x / y;
}
__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
{
return cabsf (x);
}
/* -- Double Precision -- */
typedef double complex cuDoubleComplex;
__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
{
return creal(x);
}
__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
{
return cimag(x);
}
__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
(double x, doubl
e y)
{
return x + I * y;
}
__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComple
x x)
{
return conj (x);
}
__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComple
x x,
cuDoubleComple
x y)
{
return x + y;
}
__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComple
x x,
cuDoubleComple
x y)
{
return x - y;
}
__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComple
x x,
cuDoubleComple
x y)
{
return x * y;
}
__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComple
x x,
cuDoubleComple
x y)
{
return x / y;
}
__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
{
return cabs (x);
}
/* versions for target or hosts without native support for 'complex' */
#else /* (defined(__CUDACC__) || (!(defined(CU_USE_NATIVE_COMPLEX)))) */
typedef float2 cuFloatComplex; typedef float2 cuFloatComplex;
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x) __host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
{ {
return x.x; return x.x;
} }
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x) __host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
{ {
return x.y; return x.y;
skipping to change at line 376 skipping to change at line 263
t = w / v; t = w / v;
t = 1.0 + t * t; t = 1.0 + t * t;
t = v * sqrt(t); t = v * sqrt(t);
if ((v == 0.0) || if ((v == 0.0) ||
(v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) { (v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {
t = v + w; t = v + w;
} }
return t; return t;
} }
#endif /* (!defined(__CUDACC__) && defined(CU_USE_NATIVE_COMPLEX))) */
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif /* __cplusplus */ #endif /* __cplusplus */
/* aliases */ /* aliases */
typedef cuFloatComplex cuComplex; typedef cuFloatComplex cuComplex;
__host__ __device__ static __inline__ cuComplex make_cuComplex (float x, __host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
float y) float y)
{ {
return make_cuFloatComplex (x, y); return make_cuFloatComplex (x, y);
 End of changes. 2 change blocks. 
135 lines changed or deleted 0 lines changed or added


 cuda.h   cuda.h 
skipping to change at line 42 skipping to change at line 42
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#ifndef __cuda_cuda_h__ #ifndef __cuda_cuda_h__
#define __cuda_cuda_h__ #define __cuda_cuda_h__
#include <stdlib.h> #include <stdlib.h>
/** /**
* \file * CUDA API versioning support
* \name Data types used by CUDA driver */
* \author NVIDIA Corporation #if defined(CUDA_FORCE_API_VERSION)
* \brief Data types used by CUDA driver #if (CUDA_FORCE_API_VERSION == 3010)
#define __CUDA_API_VERSION 3010
#else
#error "Unsupported value of CUDA_FORCE_API_VERSION"
#endif
#else
#define __CUDA_API_VERSION 3020
#endif /* CUDA_FORCE_API_VERSION */
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 3020
#define cuDeviceTotalMem cuDeviceTotalMem_v2
#define cuCtxCreate cuCtxCreate_v2
#define cuModuleGetGlobal cuModuleGetGlobal_v2
#define cuMemGetInfo cuMemGetInfo_v2
#define cuMemAlloc cuMemAlloc_v2
#define cuMemAllocPitch cuMemAllocPitch_v2
#define cuMemFree cuMemFree_v2
#define cuMemGetAddressRange cuMemGetAddressRange_v2
#define cuMemAllocHost cuMemAllocHost_v2
#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v
2
#define cuMemcpyHtoD cuMemcpyHtoD_v2
#define cuMemcpyDtoH cuMemcpyDtoH_v2
#define cuMemcpyDtoD cuMemcpyDtoD_v2
#define cuMemcpyDtoA cuMemcpyDtoA_v2
#define cuMemcpyAtoD cuMemcpyAtoD_v2
#define cuMemcpyHtoA cuMemcpyHtoA_v2
#define cuMemcpyAtoH cuMemcpyAtoH_v2
#define cuMemcpyAtoA cuMemcpyAtoA_v2
#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
#define cuMemcpy2D cuMemcpy2D_v2
#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
#define cuMemcpy3D cuMemcpy3D_v2
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
#define cuMemsetD8 cuMemsetD8_v2
#define cuMemsetD16 cuMemsetD16_v2
#define cuMemsetD32 cuMemsetD32_v2
#define cuMemsetD2D8 cuMemsetD2D8_v2
#define cuMemsetD2D16 cuMemsetD2D16_v2
#define cuMemsetD2D32 cuMemsetD2D32_v2
#define cuArrayCreate cuArrayCreate_v2
#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
#define cuArray3DCreate cuArray3DCreate_v2
#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
#define cuTexRefSetAddress cuTexRefSetAddress_v2
#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
#define cuTexRefGetAddress cuTexRefGetAddress_v2
#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMapped
Pointer_v2
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 3020 */
/**
* \defgroup CUDA_DRIVER CUDA Driver API
*
* This section describes the low-level CUDA driver application programming
* interface.
*
* @{
*/ */
/** /**
* \defgroup CUDA_TYPES Data types used by CUDA driver * \defgroup CUDA_TYPES Data types used by CUDA driver
* \ingroup CUDA_DRIVER
* @{ * @{
*/ */
/** /**
* CUDA API version number * CUDA API version number
*/ */
#define CUDA_VERSION 3010 /* 3.1 */ #define CUDA_VERSION 3020 /* 3.2 */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef unsigned int CUdeviceptr; ///< CUDA device pointer
typedef int CUdevice; ///< CUDA device /**
typedef struct CUctx_st *CUcontext; ///< CUDA context * CUDA device pointer
typedef struct CUmod_st *CUmodule; ///< CUDA module */
typedef struct CUfunc_st *CUfunction; ///< CUDA function #if __CUDA_API_VERSION >= 3020
typedef struct CUarray_st *CUarray; ///< CUDA array
typedef struct CUtexref_st *CUtexref; ///< CUDA texture reference
typedef struct CUsurfref_st *CUsurfref; ///< CUDA surface reference
typedef struct CUevent_st *CUevent; ///< CUDA event
typedef struct CUstream_st *CUstream; ///< CUDA stream
typedef struct CUgraphicsResource_st *CUgraphicsResource; ///< CUDA gra
phics interop resource
typedef struct CUuuid_st { ///< CUDA definition of UUID #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
char bytes[16]; typedef unsigned long long CUdeviceptr;
} CUuuid; #else
typedef unsigned int CUdeviceptr;
#endif
/************************************ #endif /* __CUDA_API_VERSION >= 3020 */
**
** Enums typedef int CUdevice; /**< CUDA device
** */
***********************************/ typedef struct CUctx_st *CUcontext; /**< CUDA context
*/
typedef struct CUmod_st *CUmodule; /**< CUDA module
*/
typedef struct CUfunc_st *CUfunction; /**< CUDA functio
n */
typedef struct CUarray_st *CUarray; /**< CUDA array *
/
typedef struct CUtexref_st *CUtexref; /**< CUDA texture
reference */
typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface
reference */
typedef struct CUevent_st *CUevent; /**< CUDA event *
/
typedef struct CUstream_st *CUstream; /**< CUDA stream
*/
typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphic
s interop resource */
typedef struct CUuuid_st { /**< CUDA definit
ion of UUID */
char bytes[16];
} CUuuid;
/** /**
* Context creation flags * Context creation flags
*/ */
typedef enum CUctx_flags_enum { typedef enum CUctx_flags_enum {
CU_CTX_SCHED_AUTO = 0, ///< Automatic scheduling CU_CTX_SCHED_AUTO = 0, /**< Automatic scheduling */
CU_CTX_SCHED_SPIN = 1, ///< Set spin as default scheduling CU_CTX_SCHED_SPIN = 1, /**< Set spin as default scheduling */
CU_CTX_SCHED_YIELD = 2, ///< Set yield as default scheduling CU_CTX_SCHED_YIELD = 2, /**< Set yield as default scheduling */
CU_CTX_SCHED_MASK = 0x3, CU_CTX_SCHED_MASK = 0x3,
CU_CTX_BLOCKING_SYNC = 4, ///< Use blocking synchronization CU_CTX_BLOCKING_SYNC = 4, /**< Use blocking synchronization */
CU_CTX_MAP_HOST = 8, ///< Support mapped pinned allocations CU_CTX_MAP_HOST = 8, /**< Support mapped pinned allocations
CU_CTX_LMEM_RESIZE_TO_MAX = 16, ///< Keep local memory allocation after */
launch CU_CTX_LMEM_RESIZE_TO_MAX = 16, /**< Keep local memory allocation after
launch */
CU_CTX_FLAGS_MASK = 0x1f CU_CTX_FLAGS_MASK = 0x1f
} CUctx_flags; } CUctx_flags;
/** /**
* Event creation flags * Event creation flags
*/ */
typedef enum CUevent_flags_enum { typedef enum CUevent_flags_enum {
CU_EVENT_DEFAULT = 0, ///< Default event flag CU_EVENT_DEFAULT = 0, /**< Default event flag */
CU_EVENT_BLOCKING_SYNC = 1 ///< Event uses blocking synchronization CU_EVENT_BLOCKING_SYNC = 1, /**< Event uses blocking synchronization *
/
CU_EVENT_DISABLE_TIMING = 2 /**< Event will not record timing data */
} CUevent_flags; } CUevent_flags;
/** /**
* Array formats * Array formats
*/ */
typedef enum CUarray_format_enum { typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit integers CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit integers CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit integers CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */
CU_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit integers CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */
CU_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit integers CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit integers CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */
CU_AD_FORMAT_HALF = 0x10, ///< 16-bit floating point CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */
CU_AD_FORMAT_FLOAT = 0x20 ///< 32-bit floating point CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */
} CUarray_format; } CUarray_format;
/** /**
* Texture reference addressing modes * Texture reference addressing modes
*/ */
typedef enum CUaddress_mode_enum { typedef enum CUaddress_mode_enum {
CU_TR_ADDRESS_MODE_WRAP = 0, ///< Wrapping address mode CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */
CU_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp to edge address mode CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */
CU_TR_ADDRESS_MODE_MIRROR = 2 ///< Mirror address mode CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */
CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */
} CUaddress_mode; } CUaddress_mode;
/** /**
* Texture reference filtering modes * Texture reference filtering modes
*/ */
typedef enum CUfilter_mode_enum { typedef enum CUfilter_mode_enum {
CU_TR_FILTER_MODE_POINT = 0, ///< Point filter mode CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */
CU_TR_FILTER_MODE_LINEAR = 1 ///< Linear filter mode CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */
} CUfilter_mode; } CUfilter_mode;
/** /**
* Device properties * Device properties
*/ */
typedef enum CUdevice_attribute_enum { typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, ///< Maximum number of CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximu
threads per block m number of threads per block */
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, ///< Maximum block dime CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximu
nsion X m block dimension X */
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, ///< Maximum block dime CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximu
nsion Y m block dimension Y */
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, ///< Maximum block dime CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximu
nsion Z m block dimension Z */
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, ///< Maximum grid dimen CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximu
sion X m grid dimension X */
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, ///< Maximum grid dimen CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximu
sion Y m grid dimension Y */
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, ///< Maximum grid dimen CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximu
sion Z m grid dimension Z */
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, ///< Maximum sh CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximu
ared memory available per block in bytes m shared memory available per block in bytes */
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, ///< Deprecated, us CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprec
e CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK ated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, ///< Memory available o CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory
n device for __constant__ variables in a CUDA C kernel in bytes available on device for __constant__ variables in a CUDA C kernel in bytes
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, ///< Warp size in threa */
ds CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp s
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, ///< Maximum pitch in b ize in threads */
ytes allowed by memory copies CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximu
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, ///< Maximum number m pitch in bytes allowed by memory copies */
of 32-bit registers available per block CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximu
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, ///< Deprecated, use CU m number of 32-bit registers available per block */
_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprec
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, ///< Peak clock frequen ated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
cy in kilohertz CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak c
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, ///< Alignment requirem lock frequency in kilohertz */
ent for textures CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignm
ent requirement for textures */
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, ///< Device can possibl CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device
y copy memory and execute a kernel concurrently can possibly copy memory and execute a kernel concurrently */
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, ///< Number of multipro CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number
cessors on device of multiprocessors on device */
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, ///< Specifies whether CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specif
there is a run time limit on kernels ies whether there is a run time limit on kernels */
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, ///< Device is integrat CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device
ed with host memory is integrated with host memory */
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, ///< Device can map hos CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device
t memory into CUDA address space can map host memory into CUDA address space */
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, ///< Compute mode (See CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Comput
::CUcomputemode for details) e mode (See ::CUcomputemode for details) */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, ///< Maximum 1D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximu
re width m 1D texture width */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, ///< Maximum 2D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximu
re width m 2D texture width */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,///< Maximum 2D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximu
re height m 2D texture height */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, ///< Maximum 3D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximu
re width m 3D texture width */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,///< Maximum 3D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximu
re height m 3D texture height */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, ///< Maximum 3D textu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximu
re depth m 3D texture depth */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, ///< Maximum te CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximu
xture array width m texture array width */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,///< Maximum te CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximu
xture array height m texture array height */
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, ///< Maximu CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximu
m slices in a texture array m slices in a texture array */
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, ///< Alignment requirement CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignm
for surfaces ent requirement for surfaces */
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, ///< Device can possibly e CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device
xecute multiple kernels concurrently can possibly execute multiple kernels concurrently */
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, ///< Device has ECC support enabl CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device
ed has ECC support enabled */
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, ////< PCI bus ID of the device CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bu
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34 ////< PCI device ID of the devic s ID of the device */
e CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI de
vice ID of the device */
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device
is using TCC driver model */
} CUdevice_attribute; } CUdevice_attribute;
/** /**
* Legacy device properties * Legacy device properties
*/ */
typedef struct CUdevprop_st { typedef struct CUdevprop_st {
int maxThreadsPerBlock; ///< Maximum number of threads per block int maxThreadsPerBlock; /**< Maximum number of threads per block */
int maxThreadsDim[3]; ///< Maximum size of each dimension of a bl int maxThreadsDim[3]; /**< Maximum size of each dimension of a bl
ock ock */
int maxGridSize[3]; ///< Maximum size of each dimension of a gr int maxGridSize[3]; /**< Maximum size of each dimension of a gr
id id */
int sharedMemPerBlock; ///< Shared memory available per block in b int sharedMemPerBlock; /**< Shared memory available per block in b
ytes ytes */
int totalConstantMemory; ///< Constant memory available on device in int totalConstantMemory; /**< Constant memory available on device in
bytes bytes */
int SIMDWidth; ///< Warp size in threads int SIMDWidth; /**< Warp size in threads */
int memPitch; ///< Maximum pitch in bytes allowed by memo int memPitch; /**< Maximum pitch in bytes allowed by memo
ry copies ry copies */
int regsPerBlock; ///< 32-bit registers available per block int regsPerBlock; /**< 32-bit registers available per block *
int clockRate; ///< Clock frequency in kilohertz /
int textureAlign; ///< Alignment requirement for textures int clockRate; /**< Clock frequency in kilohertz */
int textureAlign; /**< Alignment requirement for textures */
} CUdevprop; } CUdevprop;
/** /**
* Function properties * Function properties
*/ */
typedef enum CUfunction_attribute_enum { typedef enum CUfunction_attribute_enum {
/** /**
* The number of threads beyond which a launch of the function would fa * The maximum number of threads per block, beyond which a launch of th
il. e
* This number depends on both the function and the device on which the * function would fail. This number depends on both the function and th
* function is currently loaded. e
* device on which the function is currently loaded.
*/ */
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
/** /**
* The size in bytes of statically-allocated shared memory required by * The size in bytes of statically-allocated shared memory required by
* this function. This does not include dynamically-allocated shared * this function. This does not include dynamically-allocated shared
* memory requested by the user at runtime. * memory requested by the user at runtime.
*/ */
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
/** /**
* The size in bytes of user-allocated constant memory required by this * The size in bytes of user-allocated constant memory required by this
* function. * function.
*/ */
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
/** /**
* The size in bytes of thread local memory used by this function. * The size in bytes of local memory used by each thread of this functi on.
*/ */
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
/** /**
* The number of registers used by each thread of this function. * The number of registers used by each thread of this function.
*/ */
CU_FUNC_ATTRIBUTE_NUM_REGS = 4, CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
/** /**
* The PTX virtual architecture version for which the function was comp * The PTX virtual architecture version for which the function was
iled. * compiled. This value is the major PTX version * 10 + the minor PTX
* version, so a PTX version 1.3 function would return the value 13.
* Note that this may return the undefined value of 0 for cubins
* compiled prior to CUDA 3.0.
*/ */
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
/** /**
* The binary version for which the function was compiled. * The binary architecture version for which the function was compiled.
* This value is the major binary version * 10 + the minor binary versi
on,
* so a binary version 1.3 function would return the value 13. Note tha
t
* this will return a value of 10 for legacy cubins that do not have a
* properly-encoded binary architecture version.
*/ */
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
CU_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
} CUfunction_attribute; } CUfunction_attribute;
/** /**
* Function cache configurations * Function cache configurations
*/ */
typedef enum CUfunc_cache_enum { typedef enum CUfunc_cache_enum {
CU_FUNC_CACHE_PREFER_NONE = 0x00, CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memo
CU_FUNC_CACHE_PREFER_SHARED = 0x01, ry or L1 (default) */
CU_FUNC_CACHE_PREFER_L1 = 0x02 CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory a
nd smaller L1 cache */
CU_FUNC_CACHE_PREFER_L1 = 0x02 /**< prefer larger L1 cache and sm
aller shared memory */
} CUfunc_cache; } CUfunc_cache;
/** /**
* Memory types * Memory types
*/ */
typedef enum CUmemorytype_enum { typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01, ///< Host memory CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */
CU_MEMORYTYPE_DEVICE = 0x02, ///< Device memory CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */
CU_MEMORYTYPE_ARRAY = 0x03 ///< Array memory CU_MEMORYTYPE_ARRAY = 0x03 /**< Array memory */
} CUmemorytype; } CUmemorytype;
/** /**
* Compute Modes * Compute Modes
*/ */
typedef enum CUcomputemode_enum { typedef enum CUcomputemode_enum {
CU_COMPUTEMODE_DEFAULT = 0, ///< Default compute mode (Multiple CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple con
contexts allowed per device) texts allowed per device) */
CU_COMPUTEMODE_EXCLUSIVE = 1, ///< Compute-exclusive mode (Only on CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive mode (Only one c
e context can be present on this device at a time) ontext can be present on this device at a time) */
CU_COMPUTEMODE_PROHIBITED = 2 ///< Compute-prohibited mode (No con CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contex
texts can be created on this device at this time) ts can be created on this device at this time) */
} CUcomputemode; } CUcomputemode;
/** /**
* Online compiler options * Online compiler options
*/ */
typedef enum CUjit_option_enum typedef enum CUjit_option_enum
{ {
/** /**
* Max number of registers that a thread may use.\n * Max number of registers that a thread may use.\n
* Option type: unsigned int * Option type: unsigned int
*/ */
CU_JIT_MAX_REGISTERS = 0, CU_JIT_MAX_REGISTERS = 0,
/** /**
* IN: Specifies minimum number of threads per block to target compilat ion * IN: Specifies minimum number of threads per block to target compilat ion
* for\n * for\n
* OUT: Returns the number of threads the compiler actually targeted. * OUT: Returns the number of threads the compiler actually targeted.
* This restricts the resource utilization fo the compiler (e.g. max * This restricts the resource utilization fo the compiler (e.g. max
* registers) such that a block with the given number of threads should be * registers) such that a block with the given number of threads should be
* able to launch based on register limitations. Note, this option does not * able to launch based on register limitations. Note, this option does not
* currently take into account any other resource limitations, such as * currently take into account any other resource limitations, such as
* shared memory utilization.\n * shared memory utilization.\n
skipping to change at line 368 skipping to change at line 443
*/ */
CU_JIT_FALLBACK_STRATEGY CU_JIT_FALLBACK_STRATEGY
} CUjit_option; } CUjit_option;
/** /**
* Online compilation targets * Online compilation targets
*/ */
typedef enum CUjit_target_enum typedef enum CUjit_target_enum
{ {
CU_TARGET_COMPUTE_10 = 0, ///< Compute device class 1.0 CU_TARGET_COMPUTE_10 = 0, /**< Compute device class 1.0 */
CU_TARGET_COMPUTE_11, ///< Compute device class 1.1 CU_TARGET_COMPUTE_11, /**< Compute device class 1.1 */
CU_TARGET_COMPUTE_12, ///< Compute device class 1.2 CU_TARGET_COMPUTE_12, /**< Compute device class 1.2 */
CU_TARGET_COMPUTE_13, ///< Compute device class 1.3 CU_TARGET_COMPUTE_13, /**< Compute device class 1.3 */
CU_TARGET_COMPUTE_20 ///< Compute device class 2.0 CU_TARGET_COMPUTE_20, /**< Compute device class 2.0 */
CU_TARGET_COMPUTE_21 /**< Compute device class 2.1 */
} CUjit_target; } CUjit_target;
/** /**
* Cubin matching fallback strategies * Cubin matching fallback strategies
*/ */
typedef enum CUjit_fallback_enum typedef enum CUjit_fallback_enum
{ {
/** Prefer to compile ptx */ CU_PREFER_PTX = 0, /**< Prefer to compile ptx */
CU_PREFER_PTX = 0,
/** Prefer to fall back to compatible binary code */ CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code
CU_PREFER_BINARY */
} CUjit_fallback; } CUjit_fallback;
/** /**
* Flags to register a graphics resource * Flags to register a graphics resource
*/ */
typedef enum CUgraphicsRegisterFlags_enum { typedef enum CUgraphicsRegisterFlags_enum {
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00 CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00
} CUgraphicsRegisterFlags; } CUgraphicsRegisterFlags;
skipping to change at line 408 skipping to change at line 482
typedef enum CUgraphicsMapResourceFlags_enum { typedef enum CUgraphicsMapResourceFlags_enum {
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
} CUgraphicsMapResourceFlags; } CUgraphicsMapResourceFlags;
/** /**
* Array indices for cube faces * Array indices for cube faces
*/ */
typedef enum CUarray_cubemap_face_enum { typedef enum CUarray_cubemap_face_enum {
CU_CUBEMAP_FACE_POSITIVE_X = 0x00, ///< Positive X face of cubemap CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */
CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, ///< Negative X face of cubemap CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */
CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, ///< Positive Y face of cubemap CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, ///< Negative Y face of cubemap CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */
CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, ///< Positive Z face of cubemap CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 ///< Negative Z face of cubemap CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */
} CUarray_cubemap_face; } CUarray_cubemap_face;
/** /**
* Limits * Limits
*/ */
typedef enum CUlimit_enum { typedef enum CUlimit_enum {
CU_LIMIT_STACK_SIZE = 0x00, ///< GPU thread stack size CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01 ///< GPU printf FIFO size CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */
CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /**< GPU malloc heap size */
} CUlimit; } CUlimit;
/************************************
**
** Error codes
**
***********************************/
/** /**
* Error codes * Error codes
*/ */
typedef enum cudaError_enum { typedef enum cudaError_enum {
/**
* The API call returned with no errors. In the case of query calls, th
is
* can also mean that the operation being queried is complete (see
* ::cuEventQuery() and ::cuStreamQuery()).
*/
CUDA_SUCCESS = 0,
CUDA_SUCCESS = 0, ///< No errors /**
CUDA_ERROR_INVALID_VALUE = 1, ///< Invalid value * This indicates that one or more of the parameters passed to the API
CUDA_ERROR_OUT_OF_MEMORY = 2, ///< Out of memory call
CUDA_ERROR_NOT_INITIALIZED = 3, ///< Driver not initia * is not within an acceptable range of values.
lized */
CUDA_ERROR_DEINITIALIZED = 4, ///< Driver deinitiali CUDA_ERROR_INVALID_VALUE = 1,
zed
CUDA_ERROR_NO_DEVICE = 100, ///< No CUDA-capable d /**
evice available * The API call failed because it was unable to allocate enough memory
CUDA_ERROR_INVALID_DEVICE = 101, ///< Invalid device to
* perform the requested operation.
*/
CUDA_ERROR_OUT_OF_MEMORY = 2,
CUDA_ERROR_INVALID_IMAGE = 200, ///< Invalid kernel im /**
age * This indicates that the CUDA driver has not been initialized with
CUDA_ERROR_INVALID_CONTEXT = 201, ///< Invalid context * ::cuInit() or that initialization has failed.
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, ///< Context already c */
urrent CUDA_ERROR_NOT_INITIALIZED = 3,
CUDA_ERROR_MAP_FAILED = 205, ///< Map failed
CUDA_ERROR_UNMAP_FAILED = 206, ///< Unmap failed
CUDA_ERROR_ARRAY_IS_MAPPED = 207, ///< Array is mapped
CUDA_ERROR_ALREADY_MAPPED = 208, ///< Already mapped
CUDA_ERROR_NO_BINARY_FOR_GPU = 209, ///< No binary for GPU
CUDA_ERROR_ALREADY_ACQUIRED = 210, ///< Already acquired
CUDA_ERROR_NOT_MAPPED = 211, ///< Not mapped
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, ///< Mapped resource n
ot available for access as an array
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, ///< Mapped resource n
ot available for access as a pointer
CUDA_ERROR_ECC_UNCORRECTABLE = 214, ///< Uncorrectable ECC
error detected
CUDA_ERROR_UNSUPPORTED_LIMIT = 215, ///< CUlimit not suppo
rted by device
CUDA_ERROR_INVALID_SOURCE = 300, ///< Invalid source /**
CUDA_ERROR_FILE_NOT_FOUND = 301, ///< File not found * This indicates that the CUDA driver is in the process of shutting do
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, ///< Link to a shared wn.
object failed to resolve */
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, ///< Shared object ini CUDA_ERROR_DEINITIALIZED = 4,
tialization failed
CUDA_ERROR_INVALID_HANDLE = 400, ///< Invalid handle /**
* This indicates that no CUDA-capable devices were detected by the ins
talled
* CUDA driver.
*/
CUDA_ERROR_NO_DEVICE = 100,
CUDA_ERROR_NOT_FOUND = 500, ///< Not found /**
* This indicates that the device ordinal supplied by the user does not
* correspond to a valid CUDA device.
*/
CUDA_ERROR_INVALID_DEVICE = 101,
CUDA_ERROR_NOT_READY = 600, ///< CUDA not ready /**
* This indicates that the device kernel image is invalid. This can als
o
* indicate an invalid CUDA module.
*/
CUDA_ERROR_INVALID_IMAGE = 200,
CUDA_ERROR_LAUNCH_FAILED = 700, ///< Launch failed /**
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, ///< Launch exceeded r * This most frequently indicates that there is no context bound to the
esources * current thread. This can also be returned if the context passed to a
CUDA_ERROR_LAUNCH_TIMEOUT = 702, ///< Launch exceeded t n
imeout * API call is not a valid handle (such as a context that has had
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, ///< Launch with incom * ::cuCtxDestroy() invoked on it). This can also be returned if a user
patible texturing * mixes different API versions (i.e. 3010 context with 3020 API calls)
.
* See ::cuCtxGetApiVersion() for more details.
*/
CUDA_ERROR_INVALID_CONTEXT = 201,
CUDA_ERROR_POINTER_IS_64BIT = 800, ///< Attempted to retr /**
ieve 64-bit pointer via 32-bit API function * This indicated that the context being supplied as a parameter to the
CUDA_ERROR_SIZE_IS_64BIT = 801, ///< Attempted to retr * API call was already the active context.
ieve 64-bit size via 32-bit API function * \deprecated
* This error return is deprecated as of CUDA 3.2. It is no longer an
* error to attempt to push the active context via ::cuCtxPushCurrent()
.
*/
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
CUDA_ERROR_UNKNOWN = 999 ///< Unknown error /**
* This indicates that a map or register operation has failed.
*/
CUDA_ERROR_MAP_FAILED = 205,
/**
* This indicates that an unmap or unregister operation has failed.
*/
CUDA_ERROR_UNMAP_FAILED = 206,
/**
* This indicates that the specified array is currently mapped and thus
* cannot be destroyed.
*/
CUDA_ERROR_ARRAY_IS_MAPPED = 207,
/**
* This indicates that the resource is already mapped.
*/
CUDA_ERROR_ALREADY_MAPPED = 208,
/**
* This indicates that there is no kernel image available that is suita
ble
* for the device. This can occur when a user specifies code generation
* options for a particular CUDA source file that do not include the
* corresponding device configuration.
*/
CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
/**
* This indicates that a resource has already been acquired.
*/
CUDA_ERROR_ALREADY_ACQUIRED = 210,
/**
* This indicates that a resource is not mapped.
*/
CUDA_ERROR_NOT_MAPPED = 211,
/**
* This indicates that a mapped resource is not available for access as
an
* array.
*/
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
/**
* This indicates that a mapped resource is not available for access as
a
* pointer.
*/
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
/**
* This indicates that an uncorrectable ECC error was detected during
* execution.
*/
CUDA_ERROR_ECC_UNCORRECTABLE = 214,
/**
* This indicates that the ::CUlimit passed to the API call is not
* supported by the active device.
*/
CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
/**
* This indicates that the device kernel source is invalid.
*/
CUDA_ERROR_INVALID_SOURCE = 300,
/**
* This indicates that the file specified was not found.
*/
CUDA_ERROR_FILE_NOT_FOUND = 301,
/**
* This indicates that a link to a shared object failed to resolve.
*/
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
/**
* This indicates that initialization of a shared object failed.
*/
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
/**
* This indicates that an OS call failed.
*/
CUDA_ERROR_OPERATING_SYSTEM = 304,
/**
* This indicates that a resource handle passed to the API call was not
* valid. Resource handles are opaque types like ::CUstream and ::CUeve
nt.
*/
CUDA_ERROR_INVALID_HANDLE = 400,
/**
* This indicates that a named symbol was not found. Examples of symbol
s
* are global/constant variable names, texture names, and surface names
.
*/
CUDA_ERROR_NOT_FOUND = 500,
/**
* This indicates that asynchronous operations issued previously have n
ot
* completed yet. This result is not actually an error, but must be ind
icated
* differently than ::CUDA_SUCCESS (which indicates completion). Calls
that
* may return this value include ::cuEventQuery() and ::cuStreamQuery()
.
*/
CUDA_ERROR_NOT_READY = 600,
/**
* An exception occurred on the device while executing a kernel. Common
* causes include dereferencing an invalid device pointer and accessing
* out of bounds shared memory. The context cannot be used, so it must
* be destroyed (and a new one should be created). All existing device
* memory allocations from this context are invalid and must be
* reconstructed if the program is to continue using CUDA.
*/
CUDA_ERROR_LAUNCH_FAILED = 700,
/**
* This indicates that a launch did not occur because it did not have
* appropriate resources. This error usually indicates that the user ha
s
* attempted to pass too many arguments to the device kernel, or the
* kernel launch specifies too many threads for the kernel's register
* count. Passing arguments of the wrong size (i.e. a 64-bit pointer
* when a 32-bit int is expected) is equivalent to passing too many
* arguments and can also result in this error.
*/
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
/**
* This indicates that the device kernel took too long to execute. This
can
* only occur if timeouts are enabled - see the device attribute
* ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The
* context cannot be used (and must be destroyed similar to
* ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations
from
* this context are invalid and must be reconstructed if the program is
to
* continue using CUDA.
*/
CUDA_ERROR_LAUNCH_TIMEOUT = 702,
/**
* This error indicates a kernel launch that uses an incompatible textu
ring
* mode.
*/
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
/**
* This indicates that an unknown internal error has occurred.
*/
CUDA_ERROR_UNKNOWN = 999
} CUresult; } CUresult;
/** /**
* If set, host memory is portable between CUDA contexts. * If set, host memory is portable between CUDA contexts.
* Flag for ::cuMemHostAlloc() * Flag for ::cuMemHostAlloc()
*/ */
#define CU_MEMHOSTALLOC_PORTABLE 0x01 #define CU_MEMHOSTALLOC_PORTABLE 0x01
/** /**
* If set, host memory is mapped into CUDA address space and * If set, host memory is mapped into CUDA address space and
skipping to change at line 502 skipping to change at line 743
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02
/** /**
* If set, host memory is allocated as write-combined - fast to write, * If set, host memory is allocated as write-combined - fast to write,
* faster to DMA, slow to read except via SSE4 streaming load instruction * faster to DMA, slow to read except via SSE4 streaming load instruction
* (MOVNTDQA). * (MOVNTDQA).
* Flag for ::cuMemHostAlloc() * Flag for ::cuMemHostAlloc()
*/ */
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
#if __CUDA_API_VERSION >= 3020
/** /**
* 2D memory copy parameters * 2D memory copy parameters
*/ */
typedef struct CUDA_MEMCPY2D_st { typedef struct CUDA_MEMCPY2D_st {
size_t srcXInBytes; /**< Source X in bytes */
size_t srcY; /**< Source Y */
unsigned int srcXInBytes, ///< Source X in bytes CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
srcY; ///< Source Y y) */
CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra const void *srcHost; /**< Source host pointer */
y) CUdeviceptr srcDevice; /**< Source device pointer */
const void *srcHost; ///< Source host pointer CUarray srcArray; /**< Source array reference */
CUdeviceptr srcDevice; ///< Source device pointer size_t srcPitch; /**< Source pitch (ignored when src is arra
CUarray srcArray; ///< Source array reference y) */
unsigned int srcPitch; ///< Source pitch (ignored when src is arra
y)
unsigned int dstXInBytes, ///< Destination X in bytes size_t dstXInBytes; /**< Destination X in bytes */
dstY; ///< Destination Y size_t dstY; /**< Destination Y */
CUmemorytype dstMemoryType; ///< Destination memory type (host, device,
array)
void *dstHost; ///< Destination host pointer
CUdeviceptr dstDevice; ///< Destination device pointer
CUarray dstArray; ///< Destination array reference
unsigned int dstPitch; ///< Destination pitch (ignored when dst is
array)
unsigned int WidthInBytes; ///< Width of 2D memory copy in bytes CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
unsigned int Height; ///< Height of 2D memory copy array) */
void *dstHost; /**< Destination host pointer */
CUdeviceptr dstDevice; /**< Destination device pointer */
CUarray dstArray; /**< Destination array reference */
size_t dstPitch; /**< Destination pitch (ignored when dst is
array) */
size_t WidthInBytes; /**< Width of 2D memory copy in bytes */
size_t Height; /**< Height of 2D memory copy */
} CUDA_MEMCPY2D; } CUDA_MEMCPY2D;
/** /**
* 3D memory copy parameters * 3D memory copy parameters
*/ */
typedef struct CUDA_MEMCPY3D_st { typedef struct CUDA_MEMCPY3D_st {
size_t srcXInBytes; /**< Source X in bytes */
size_t srcY; /**< Source Y */
size_t srcZ; /**< Source Z */
size_t srcLOD; /**< Source LOD */
CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
y) */
const void *srcHost; /**< Source host pointer */
CUdeviceptr srcDevice; /**< Source device pointer */
CUarray srcArray; /**< Source array reference */
void *reserved0; /**< Must be NULL */
size_t srcPitch; /**< Source pitch (ignored when src is arra
y) */
size_t srcHeight; /**< Source height (ignored when src is arr
ay; may be 0 if Depth==1) */
unsigned int srcXInBytes, ///< Source X in bytes size_t dstXInBytes; /**< Destination X in bytes */
srcY, ///< Source Y size_t dstY; /**< Destination Y */
srcZ; ///< Source Z size_t dstZ; /**< Destination Z */
unsigned int srcLOD; ///< Source LOD size_t dstLOD; /**< Destination LOD */
CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
y) array) */
const void *srcHost; ///< Source host pointer void *dstHost; /**< Destination host pointer */
CUdeviceptr srcDevice; ///< Source device pointer CUdeviceptr dstDevice; /**< Destination device pointer */
CUarray srcArray; ///< Source array reference CUarray dstArray; /**< Destination array reference */
void *reserved0; ///< Must be NULL void *reserved1; /**< Must be NULL */
unsigned int srcPitch; ///< Source pitch (ignored when src is arra size_t dstPitch; /**< Destination pitch (ignored when dst is
y) array) */
unsigned int srcHeight; ///< Source height (ignored when src is arr size_t dstHeight; /**< Destination height (ignored when dst i
ay; may be 0 if Depth==1) s array; may be 0 if Depth==1) */
unsigned int dstXInBytes, ///< Destination X in bytes
dstY, ///< Destination Y
dstZ; ///< Destination Z
unsigned int dstLOD; ///< Destination LOD
CUmemorytype dstMemoryType; ///< Destination memory type (host, device,
array)
void *dstHost; ///< Destination host pointer
CUdeviceptr dstDevice; ///< Destination device pointer
CUarray dstArray; ///< Destination array reference
void *reserved1; ///< Must be NULL
unsigned int dstPitch; ///< Destination pitch (ignored when dst is
array)
unsigned int dstHeight; ///< Destination height (ignored when dst i
s array; may be 0 if Depth==1)
unsigned int WidthInBytes; ///< Width of 3D memory copy in bytes size_t WidthInBytes; /**< Width of 3D memory copy in bytes */
unsigned int Height; ///< Height of 3D memory copy size_t Height; /**< Height of 3D memory copy */
unsigned int Depth; ///< Depth of 3D memory copy size_t Depth; /**< Depth of 3D memory copy */
} CUDA_MEMCPY3D; } CUDA_MEMCPY3D;
/** /**
* Array descriptor * Array descriptor
*/ */
typedef struct typedef struct CUDA_ARRAY_DESCRIPTOR_st
{ {
unsigned int Width; ///< Width of array size_t Width; /**< Width of array */
unsigned int Height; ///< Height of array size_t Height; /**< Height of array */
CUarray_format Format; ///< Array format
unsigned int NumChannels; ///< Channels per array element CUarray_format Format; /**< Array format */
unsigned int NumChannels; /**< Channels per array element */
} CUDA_ARRAY_DESCRIPTOR; } CUDA_ARRAY_DESCRIPTOR;
/** /**
* 3D array descriptor * 3D array descriptor
*/ */
typedef struct typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
{ {
unsigned int Width; ///< Width of 3D array size_t Width; /**< Width of 3D array */
unsigned int Height; ///< Height of 3D array size_t Height; /**< Height of 3D array */
unsigned int Depth; ///< Depth of 3D array size_t Depth; /**< Depth of 3D array */
CUarray_format Format; ///< Array format
unsigned int NumChannels; ///< Channels per array element
unsigned int Flags; ///< Flags CUarray_format Format; /**< Array format */
unsigned int NumChannels; /**< Channels per array element */
unsigned int Flags; /**< Flags */
} CUDA_ARRAY3D_DESCRIPTOR; } CUDA_ARRAY3D_DESCRIPTOR;
// if set, the CUDA array contains an array of 2D slices #endif /* __CUDA_API_VERSION >= 3020 */
// and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
// the number of slices, not the depth of a 3D array. /**
* If set, the CUDA array contains an array of 2D slices
* and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
* the number of slices, not the depth of a 3D array.
*/
#define CUDA_ARRAY3D_2DARRAY 0x01 #define CUDA_ARRAY3D_2DARRAY 0x01
// this flag must be set in order to bind a surface reference /**
// to the CUDA array * This flag must be set in order to bind a surface reference
* to the CUDA array
*/
#define CUDA_ARRAY3D_SURFACE_LDST 0x02 #define CUDA_ARRAY3D_SURFACE_LDST 0x02
/** /**
* Override the texref format with a format inferred from the array. * Override the texref format with a format inferred from the array.
* Flag for ::cuTexRefSetArray() * Flag for ::cuTexRefSetArray()
*/ */
#define CU_TRSA_OVERRIDE_FORMAT 0x01 #define CU_TRSA_OVERRIDE_FORMAT 0x01
/** /**
* Read the texture as integers rather than promoting the values to floats * Read the texture as integers rather than promoting the values to floats
skipping to change at line 619 skipping to change at line 865
*/ */
#define CU_TRSF_READ_AS_INTEGER 0x01 #define CU_TRSF_READ_AS_INTEGER 0x01
/** /**
* Use normalized texture coordinates in the range [0,1) instead of [0,dim) . * Use normalized texture coordinates in the range [0,1) instead of [0,dim) .
* Flag for ::cuTexRefSetFlags() * Flag for ::cuTexRefSetFlags()
*/ */
#define CU_TRSF_NORMALIZED_COORDINATES 0x02 #define CU_TRSF_NORMALIZED_COORDINATES 0x02
/** /**
* Perform sRGB->linear conversion during texture read.
* Flag for ::cuTexRefSetFlags()
*/
#define CU_TRSF_SRGB 0x10
/**
* For texture references loaded into the module, use default texunit from * For texture references loaded into the module, use default texunit from
* texture reference. * texture reference.
*/ */
#define CU_PARAM_TR_DEFAULT -1 #define CU_PARAM_TR_DEFAULT -1
/** @} */
/** @} */ /* END CUDA_TYPES */ /** @} */ /* END CUDA_TYPES */
#ifdef _WIN32 #ifdef _WIN32
#define CUDAAPI __stdcall #define CUDAAPI __stdcall
#else #else
#define CUDAAPI #define CUDAAPI
#endif #endif
/********************************* /**
** Initialization * \defgroup CUDA_INITIALIZE Initialization
*********************************/ *
CUresult CUDAAPI cuInit(unsigned int Flags); * This section describes the initialization functions of the low-level CUD
A
* driver application programming interface.
*
* @{
*/
/********************************* /**
** Driver Version Query * \brief Initialize the CUDA driver API
*********************************/ *
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion); * Initializes the driver API and must be called before any other function
from
* the driver API. Currently, the \p Flags parameter must be 0. If ::cuInit
()
* has not been called, any function from the driver API will return
* ::CUDA_ERROR_NOT_INITIALIZED.
*
* \param Flags - Initialization flag for CUDA.
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*/
CUresult CUDAAPI cuInit(unsigned int Flags);
/************************************ /** @} */ /* END CUDA_INITIALIZE */
**
** Device management
**
***********************************/
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal); /**
CUresult CUDAAPI cuDeviceGetCount(int *count); * \defgroup CUDA_VERSION Version Management
CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev); *
CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUd * This section describes the version management functions of the low-level
evice dev); * CUDA driver application programming interface.
CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev); *
CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev); * @{
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attr */
ib, CUdevice dev);
/************************************ /**
** * \brief Returns the CUDA driver version
** Context management *
** * Returns in \p *driverVersion the version number of the installed CUDA
***********************************/ * driver. This function automatically returns ::CUDA_ERROR_INVALID_VALUE i
f
* the \p driverVersion argument is NULL.
*
* \param driverVersion - Returns the CUDA driver version
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*/
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUde /** @} */ /* END CUDA_VERSION */
vice dev );
CUresult CUDAAPI cuCtxDestroy( CUcontext ctx );
CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);
CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
CUresult CUDAAPI cuCtxPushCurrent( CUcontext ctx );
CUresult CUDAAPI cuCtxPopCurrent( CUcontext *pctx );
CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);
CUresult CUDAAPI cuCtxSynchronize(void);
/************************************ /**
** * \defgroup CUDA_DEVICE Device Management
** Module management *
** * This section describes the device management functions of the low-level
***********************************/ * CUDA driver application programming interface.
*
* @{
*/
CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname); /**
CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) * \brief Returns a handle to a compute device
; *
CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *imag * Returns in \p *device a device handle given an ordinal in the range <b>[
e, unsigned int numOptions, CUjit_option *options, void **optionValues); 0,
CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *f * ::cuDeviceGetCount()-1]</b>.
atCubin); *
CUresult CUDAAPI cuModuleUnload(CUmodule hmod); * \param device - Returned device handle
CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, * \param ordinal - Device number to get handle for
const char *name); *
CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *by * \return
tes, CUmodule hmod, const char *name); * ::CUDA_SUCCESS,
CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, c * ::CUDA_ERROR_DEINITIALIZED,
onst char *name); * ::CUDA_ERROR_NOT_INITIALIZED,
CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod * ::CUDA_ERROR_INVALID_CONTEXT,
, const char *name); * ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetAttribute,
* ::cuDeviceGetCount,
* ::cuDeviceGetName,
* ::cuDeviceGetProperties,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
/************************************ /**
** * \brief Returns the number of compute-capable devices
** Memory management *
** * Returns in \p *count the number of devices with compute capability great
***********************************/ er
* than or equal to 1.0 that are available for execution. If there is no su
ch
* device, ::cuDeviceGetCount() returns 0.
*
* \param count - Returned number of compute-capable devices
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetAttribute,
* ::cuDeviceGetName,
* ::cuDeviceGet,
* ::cuDeviceGetProperties,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceGetCount(int *count);
CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total); /**
* \brief Returns an identifer string for the device
*
* Returns an ASCII string identifying the device \p dev in the NULL-termin
ated
* string pointed to by \p name. \p len specifies the maximum length of the
* string that may be returned.
*
* \param name - Returned identifier string for the device
* \param len - Maximum length of string to store in \p name
* \param dev - Device to get identifier string for
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetAttribute,
* ::cuDeviceGetCount,
* ::cuDeviceGet,
* ::cuDeviceGetProperties,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
CUresult CUDAAPI cuMemAlloc( CUdeviceptr *dptr, unsigned int bytesize); /**
CUresult CUDAAPI cuMemAllocPitch( CUdeviceptr *dptr, * \brief Returns the compute capability of the device
unsigned int *pPitch, *
unsigned int WidthInBytes, * Returns in \p *major and \p *minor the major and minor revision numbers
unsigned int Height, that
// size of biggest r/w to be performe * define the compute capability of the device \p dev.
d by kernels on this memory *
// 4, 8 or 16 bytes * \param major - Major revision number
unsigned int ElementSizeBytes * \param minor - Minor revision number
); * \param dev - Device handle
CUresult CUDAAPI cuMemFree(CUdeviceptr dptr); *
CUresult CUDAAPI cuMemGetAddressRange( CUdeviceptr *pbase, unsigned int * \return
*psize, CUdeviceptr dptr ); * ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa
* ::cuDeviceGetAttribute,
* ::cuDeviceGetCount,
* ::cuDeviceGetName,
* ::cuDeviceGet,
* ::cuDeviceGetProperties,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice
dev);
CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize); #if __CUDA_API_VERSION >= 3020
CUresult CUDAAPI cuMemFreeHost(void *p); /**
* \brief Returns the total amount of memory on the device
*
* Returns in \p *bytes the total amount of memory available on the device
* \p dev in bytes.
*
* \param bytes - Returned memory available on device in bytes
* \param dev - Device handle
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetAttribute,
* ::cuDeviceGetCount,
* ::cuDeviceGetName,
* ::cuDeviceGet,
* ::cuDeviceGetProperties,
*/
CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
#endif /* __CUDA_API_VERSION >= 3020 */
CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned in /**
t Flags ); * \brief Returns properties for a selected device
*
* Returns in \p *prop the properties of device \p dev. The ::CUdevprop
* structure is defined as:
*
* \code
typedef struct CUdevprop_st {
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int sharedMemPerBlock;
int totalConstantMemory;
int SIMDWidth;
int memPitch;
int regsPerBlock;
int clockRate;
int textureAlign
} CUdevprop;
* \endcode
* where:
*
* - ::maxThreadsPerBlock is the maximum number of threads per block;
* - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block;
* - ::maxGridSize[3] is the maximum sizes of each dimension of a grid;
* - ::sharedMemPerBlock is the total amount of shared memory available per
* block in bytes;
* - ::totalConstantMemory is the total amount of constant memory available
on
* the device in bytes;
* - ::SIMDWidth is the warp size;
* - ::memPitch is the maximum pitch allowed by the memory copy functions t
hat
* involve memory regions allocated through ::cuMemAllocPitch();
* - ::regsPerBlock is the total number of registers available per block;
* - ::clockRate is the clock frequency in kilohertz;
* - ::textureAlign is the alignment requirement; texture base addresses th
at
* are aligned to ::textureAlign bytes do not need an offset applied to
* texture fetches.
*
* \param prop - Returned properties of device
* \param dev - Device to get properties for
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetAttribute,
* ::cuDeviceGetCount,
* ::cuDeviceGetName,
* ::cuDeviceGet,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
CUresult CUDAAPI cuMemHostGetDevicePointer( CUdeviceptr *pdptr, void *p /**
, unsigned int Flags ); * \brief Returns information about the device
CUresult CUDAAPI cuMemHostGetFlags( unsigned int *pFlags, void *p ); *
* Returns in \p *pi the integer value of the attribute \p attrib on device
* \p dev. The supported attributes are:
* - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads
per
* block;
* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block;
* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block;
* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block;
* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid;
* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid;
* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid;
* - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
* shared memory available to a thread block in bytes; this amount is sha
red
* by all thread blocks simultaneously resident on a multiprocessor;
* - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on devic
e for
* __constant__ variables in a CUDA C kernel in bytes;
* - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads;
* - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the
* memory copy functions that involve memory regions allocated through
* ::cuMemAllocPitch();
* - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bi
t
* registers available to a thread block; this number is shared by all th
read
* blocks simultaneously resident on a multiprocessor;
* - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: Peak clock frequency in kilohertz;
* - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; textur
e
* base addresses aligned to ::textureAlign bytes do not need an offset
* applied to texture fetches;
* - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently co
py
* memory between host and device while executing a kernel, or 0 if not;
* - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors
on
* the device;
* - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time li
mit
* for kernels executed on the device, or 0 if not;
* - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with t
he
* memory subsystem, or 0 if not;
* - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map hos
t
* memory into the CUDA address space, or 0 if not;
* - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is curren
tly
* in. Available modes are as follows:
* - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted an
d
* can have multiple CUDA contexts present at a single time.
* - ::CU_COMPUTEMODE_EXCLUSIVE: Compute-exclusive mode - Device can have
* only one CUDA context present on it at a time.
* - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is
* prohibited from creating new CUDA contexts.
* - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports
* executing multiple kernels within the same context simultaneously, or
0 if
* not. It is not guaranteed that multiple kernels will be resident
* on the device concurrently so this feature should not be relied upon f
or
* correctness;
* - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on
the
* device, 0 if error correction is disabled or not supported by the dev
ice.
* - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device.
* - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) i
dentifier
* of the device.
* - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC drive
r. TCC
is only available on Tesla hardware running Windows Vista or later.
* \param pi - Returned device attribute value
* \param attrib - Device attribute to query
* \param dev - Device handle
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_DEVICE
* \notefnerr
*
* \sa ::cuDeviceComputeCapability,
* ::cuDeviceGetCount,
* ::cuDeviceGetName,
* ::cuDeviceGet,
* ::cuDeviceGetProperties,
* ::cuDeviceTotalMem
*/
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, C
Udevice dev);
/************************************ /** @} */ /* END CUDA_DEVICE */
**
** Synchronous Memcpy
**
** Intra-device memcpy's done with these functions may execute in para
llel with the CPU,
** but if host memory is involved, they wait until the copy is done be
fore returning.
**
***********************************/
// 1D functions /**
// system <-> device memory * \defgroup CUDA_CTX Context Management
CUresult CUDAAPI cuMemcpyHtoD (CUdeviceptr dstDevice, const void * *
srcHost, unsigned int ByteCount ); * This section describes the context management functions of the low-level
CUresult CUDAAPI cuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevic * CUDA driver application programming interface.
e, unsigned int ByteCount ); *
* @{
*/
// device <-> device memory #if __CUDA_API_VERSION >= 3020
CUresult CUDAAPI cuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr /**
srcDevice, unsigned int ByteCount ); * \brief Create a CUDA context
*
* Creates a new CUDA context and associates it with the calling thread. Th
e
* \p flags parameter is described below. The context is created with a usa
ge
* count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy()
or
* ::cuCtxDetach() when done using the context. If a context is already cur
rent
* to the thread, it is supplanted by the newly created context and may be
* restored by a subsequent call to ::cuCtxPopCurrent().
*
* The two LSBs of the \p flags parameter can be used to control how the OS
* thread, which owns the CUDA context at the time of an API call, interact
s
* with the OS scheduler when waiting for results from the GPU.
*
* - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is ze
ro,
* uses a heuristic based on the number of active CUDA contexts in the
* process \e C and the number of logical processors in the system \e P. If
* \e C > \e P, then CUDA will yield to other OS threads when waiting for
* the GPU, otherwise CUDA will not yield while waiting for results and
* actively spin on the processor.
*
* - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
* results from the GPU. This can decrease latency when waiting for the GPU
,
* but may lower the performance of CPU threads if they are performing work
in
* parallel with the CUDA thread.
*
* - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting f
or
* results from the GPU. This can increase latency when waiting for the GPU
,
* but can increase the performance of CPU threads performing work in paral
lel
* with the GPU.
*
* - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
* synchronization primitive when waiting for the GPU to finish work.
*
* - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations.
* This flag must be set in order to allocate pinned host memory that is
* accessible to the GPU.
*
* - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
* after resizing local memory for a kernel. This can prevent thrashing by
* local memory allocations when launching many kernels with high local
* memory usage at the cost of potentially increased memory usage.
*
* <b>Note to Linux users</b>:
*
* Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode
of
* the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation w
ill
* also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device i
s
* set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active context
on
* the device. The function ::cuDeviceGetAttribute() can be used with
* ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the
* device. The <i>nvidia-smi</i> tool can be used to set the compute mode f
or
* devices. Documentation for <i>nvidia-smi</i> can be obtained by passing
a
* -h option to it.
*
* \param pctx - Returned context handle of the new context
* \param flags - Context creation flags
* \param dev - Device to create context on
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_DEVICE,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice
dev);
#endif /* __CUDA_API_VERSION >= 3020 */
// device <-> array memory /**
CUresult CUDAAPI cuMemcpyDtoA ( CUarray dstArray, unsigned int dst * \brief Destroy the current context or a floating CUDA context
Offset, CUdeviceptr srcDevice, unsigned int ByteCount ); *
CUresult CUDAAPI cuMemcpyAtoD ( CUdeviceptr dstDevice, CUarray src * Destroys the CUDA context specified by \p ctx. If the context usage coun
Array, unsigned int srcOffset, unsigned int ByteCount ); t is
* not equal to 1, or the context is current to any CPU thread other than t
he
* current one, this function fails. Floating contexts (detached from a CPU
* thread via ::cuCtxPopCurrent()) may be destroyed by this function.
*
* \param ctx - Context to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxDestroy(CUcontext ctx);
// system <-> array memory /**
CUresult CUDAAPI cuMemcpyHtoA( CUarray dstArray, unsigned int dstO * \brief Increment a context's usage-count
ffset, const void *srcHost, unsigned int ByteCount ); *
CUresult CUDAAPI cuMemcpyAtoH( void *dstHost, CUarray srcArray, un * Increments the usage count of the context and passes back a context hand
signed int srcOffset, unsigned int ByteCount ); le
* in \p *pctx that must be passed to ::cuCtxDetach() when the application
is
* done with the context. ::cuCtxAttach() fails if there is no context curr
ent
* to the thread.
*
* Currently, the \p flags parameter must be 0.
*
* \param pctx - Returned context handle of the current context
* \param flags - Context attach flags (must be 0)
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);
// array <-> array memory /**
CUresult CUDAAPI cuMemcpyAtoA( CUarray dstArray, unsigned int dstO * \brief Decrement a context's usage-count
ffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount ); *
* Decrements the usage count of the context \p ctx, and destroys the conte
xt
* if the usage count goes to 0. The context must be a handle that was pass
ed
* back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the
* calling thread.
*
* \param ctx - Context to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
// 2D memcpy /**
* \brief Pushes a floating context on the current CPU thread
*
* Pushes the given context \p ctx onto the CPU thread's stack of current
* contexts. The specified context becomes the CPU thread's current context
, so
* all CUDA functions that operate on the current context are affected.
*
* The previous current context may be made current again by calling
* ::cuCtxDestroy() or ::cuCtxPopCurrent().
*
* The context must be "floating," i.e. not attached to any thread. Context
s are
* made to float by calling ::cuCtxPopCurrent().
*
* \param ctx - Floating context to attach
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx );
CUresult CUDAAPI cuMemcpy2D( const CUDA_MEMCPY2D *pCopy ); /**
CUresult CUDAAPI cuMemcpy2DUnaligned( const CUDA_MEMCPY2D *pCopy ) * \brief Pops the current CUDA context from the current CPU thread
; *
* Pops the current CUDA context from the CPU thread. The CUDA context must
* have a usage count of 1. CUDA contexts have a usage count of 1 upon
* creation; the usage count may be incremented with ::cuCtxAttach() and
* decremented with ::cuCtxDetach().
*
* If successful, ::cuCtxPopCurrent() passes back the old context handle in
* \p *pctx. That context may then be made current to a different CPU threa
d
* by calling ::cuCtxPushCurrent().
*
* Floating contexts may be destroyed by calling ::cuCtxDestroy().
*
* If a context was current to the CPU thread before ::cuCtxCreate() or
* ::cuCtxPushCurrent() was called, this function makes that context curren
t to
* the CPU thread again.
*
* \param pctx - Returned new context handle
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx);
// 3D memcpy /**
* \brief Returns the device ID for the current context
*
* Returns in \p *device the ordinal of the current context's device.
*
* \param device - Returned device ID for the current context
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);
CUresult CUDAAPI cuMemcpy3D( const CUDA_MEMCPY3D *pCopy ); /**
* \brief Block for a context's tasks to complete
*
* Blocks until the device has completed all preceding requested tasks.
* ::cuCtxSynchronize() returns an error if one of the preceding tasks fail
ed.
* If the context was created with the ::CU_CTX_BLOCKING_SYNC flag, the CPU
* thread will block until the GPU context has finished its work.
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit
*/
CUresult CUDAAPI cuCtxSynchronize(void);
/************************************ /**
** * \brief Set resource limits
** Asynchronous Memcpy *
** * Setting \p limit to \p value is a request by the application to update
** Any host memory involved must be DMA'able (e.g., allocated with cuM * the current limit maintained by the context. The driver is free to
emAllocHost). * modify the requested value to meet h/w requirements (this could be
** memcpy's done with these functions execute in parallel with the CPU * clamping to minimum or maximum values, rounding up to nearest element
and, if * size, etc). The application can use ::cuCtxGetLimit() to find out exact
** the hardware is available, may execute in parallel with the GPU. ly
** Asynchronous memcpy must be accompanied by appropriate stream synch * what the limit has been set to.
ronization. *
** * Setting each ::CUlimit has its own specific restrictions, so each is
***********************************/ * discussed here.
*
* - ::CU_LIMIT_STACK_SIZE controls the stack size of each GPU thread.
* This limit is only applicable to devices of compute capability
* 2.0 and higher. Attempting to set this limit on devices of
* compute capability less than 2.0 will result in the error
* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
* - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size of the FIFO used
* by the ::printf() device system call. Setting
* ::CU_LIMIT_PRINTF_FIFO_SIZE must be performed before launching any
* kernel that uses the ::printf() device system call, otherwise
* ::CUDA_ERROR_INVALID_VALUE will be returned.
* This limit is only applicable to devices of compute capability
* 2.0 and higher. Attempting to set this limit on devices of
* compute capability less than 2.0 will result in the error
* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
* - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size of the heap used
* by the ::malloc() and ::free() device system calls. Setting
* ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching
* any kernel that uses the ::malloc() or ::free() device system calls,
* otherwise ::CUDA_ERROR_INVALID_VALUE will be returned.
* This limit is only applicable to devices of compute capability
* 2.0 and higher. Attempting to set this limit on devices of
* compute capability less than 2.0 will result in the error
* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
*
* \param limit - Limit to set
* \param value - Size in bytes of limit
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_UNSUPPORTED_LIMIT
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value);
// 1D functions /**
// system <-> device memory * \brief Returns resource limits
CUresult CUDAAPI cuMemcpyHtoDAsync (CUdeviceptr dstDevice, *
const void *srcHost, unsigned int ByteCount, CUstream hStream ) * Returns in \p *pvalue the current size of \p limit. The supported
; * ::CUlimit values are:
CUresult CUDAAPI cuMemcpyDtoHAsync (void *dstHost, * - ::CU_LIMIT_STACK_SIZE: stack size of each GPU thread;
CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream * - ::CU_LIMIT_PRINTF_FIFO_SIZE: size of the FIFO used by the
); * ::printf() device system call.
* - ::CU_LIMIT_MALLOC_HEAP_SIZE: size of the heap used by the
* ::malloc() and ::free() device system calls;
*
* \param limit - Limit to query
* \param pvalue - Returned size in bytes of limit
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_UNSUPPORTED_LIMIT
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
// device <-> device memory /**
CUresult CUDAAPI cuMemcpyDtoDAsync (CUdeviceptr dstDevice, * \brief Returns the preferred cache configuration for the current context
CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream .
); *
* On devices where the L1 cache and shared memory use the same hardware
* resources, this returns through \p pconfig the preferred cache configura
tion
* for the current context. This is only a preference. The driver will use
* the requested configuration if possible, but it is free to choose a diff
erent
* configuration if required to execute functions.
*
* This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices
* where the size of the L1 cache and shared memory are fixed.
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
fault)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
emory
*
* \param pconfig - Returned cache configuration
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize,
* ::cuFuncSetCacheConfig
*/
CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);
// system <-> array memory /**
CUresult CUDAAPI cuMemcpyHtoAAsync( CUarray dstArray, unsigned int * \brief Sets the preferred cache configuration for the current context.
dstOffset, *
const void *srcHost, unsigned int ByteCount, CUstream hStream ) * On devices where the L1 cache and shared memory use the same hardware
; * resources, this sets through \p config the preferred cache configuration
CUresult CUDAAPI cuMemcpyAtoHAsync( void *dstHost, CUarray srcArra for
y, unsigned int srcOffset, * the current context. This is only a preference. The driver will use
unsigned int ByteCount, CUstream hStream ); * the requested configuration if possible, but it is free to choose a diff
erent
* configuration if required to execute the function. Any function preferen
ce
* set via ::cuFuncSetCacheConfig() will be preferred over this context-wid
e
* setting. Setting the context-wide cache configuration to
* ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to pre
fer
* to not change the cache configuration unless required to launch the kern
el.
*
* This setting does nothing on devices where the size of the L1 cache and
* shared memory are fixed.
*
* Launching a kernel with a different preference than the most recent
* preference setting may insert a device-side synchronization point.
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
fault)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
emory
*
* \param config - Requested cache configuration
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetApiVersion,
* ::cuCtxGetCacheConfig,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize,
* ::cuFuncSetCacheConfig
*/
CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config);
// 2D memcpy /**
CUresult CUDAAPI cuMemcpy2DAsync( const CUDA_MEMCPY2D *pCopy, CUst * \brief Gets the context's API version.
ream hStream ); *
* Returns the API version used to create \p ctx in \p version. If \p ctx
* is NULL, returns the API version used to create the currently bound
* context.
*
* This wil return the API version used to create a context (for example,
* 3010 or 3020), which library developers can use to direct callers to a
* specific API version. Note that this API version may not be the same as
* returned by cuDriverGetVersion.
*
* \param ctx - Context to check
* \param version - Pointer to version
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa ::cuCtxAttach,
* ::cuCtxCreate,
* ::cuCtxDestroy,
* ::cuCtxDetach,
* ::cuCtxGetDevice,
* ::cuCtxGetLimit,
* ::cuCtxPopCurrent,
* ::cuCtxPushCurrent,
* ::cuCtxSetCacheConfig,
* ::cuCtxSetLimit,
* ::cuCtxSynchronize
*/
CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
// 3D memcpy /** @} */ /* END CUDA_CTX */
CUresult CUDAAPI cuMemcpy3DAsync( const CUDA_MEMCPY3D *pCopy, CUst
ream hStream );
/************************************ /**
** * \defgroup CUDA_MODULE Module Management
** Memset *
** * This section describes the module management functions of the low-level
***********************************/ CUDA
CUresult CUDAAPI cuMemsetD8( CUdeviceptr dstDevice, unsigned char * driver application programming interface.
uc, unsigned int N ); *
CUresult CUDAAPI cuMemsetD16( CUdeviceptr dstDevice, unsigned shor * @{
t us, unsigned int N ); */
CUresult CUDAAPI cuMemsetD32( CUdeviceptr dstDevice, unsigned int
ui, unsigned int N );
CUresult CUDAAPI cuMemsetD2D8( CUdeviceptr dstDevice, unsigned int /**
dstPitch, unsigned char uc, unsigned int Width, unsigned int Height ); * \brief Loads a compute module
CUresult CUDAAPI cuMemsetD2D16( CUdeviceptr dstDevice, unsigned in *
t dstPitch, unsigned short us, unsigned int Width, unsigned int Height ); * Takes a filename \p fname and loads the corresponding module \p module i
CUresult CUDAAPI cuMemsetD2D32( CUdeviceptr dstDevice, unsigned in nto
t dstPitch, unsigned int ui, unsigned int Width, unsigned int Height ); * the current context. The CUDA driver API does not attempt to lazily
* allocate the resources needed by a module; if the memory for functions a
nd
* data (constant and global) needed by the module cannot be allocated,
* ::cuModuleLoad() fails. The file should be a \e cubin file as output by
* \b nvcc or a \e PTX file, either as output by \b nvcc or handwrtten.
*
* \param module - Returned module
* \param fname - Filename of module to load
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_FILE_NOT_FOUND,
* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname);
/************************************ /**
** * \brief Load a module's data
** Function management *
** * Takes a pointer \p image and loads the corresponding module \p module in
***********************************/ to
* the current context. The pointer may be obtained by mapping a \e cubin o
r
* \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text
* string, or incorporating a \e cubin object into the executable resources
* and using operating system calls such as Windows \c FindResource() to
* obtain the pointer.
*
* \param module - Returned module
* \param image - Module data to load
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image);
CUresult CUDAAPI cuFuncSetBlockShape (CUfunction hfunc, int x, int y, i /**
nt z); * \brief Load a module's data with options
CUresult CUDAAPI cuFuncSetSharedSize (CUfunction hfunc, unsigned int by *
tes); * Takes a pointer \p image and loads the corresponding module \p module in
CUresult CUDAAPI cuFuncGetAttribute (int *pi, CUfunction_attribute attr to
ib, CUfunction hfunc); * the current context. The pointer may be obtained by mapping a \e cubin o
CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache co r
nfig); * \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text
* string, or incorporating a \e cubin object into the executable resources
* and using operating system calls such as Windows \c FindResource() to
* obtain the pointer. Options are passed as an array via \p options and an
y
* corresponding parameters are passed in \p optionValues. The number of to
tal
* options is supplied via \p numOptions. Any outputs will be returned via
* \p optionValues. Supported options are (types for the option values are
* specified in parentheses after the option name):
*
* - ::CU_JIT_MAX_REGISTERS: (unsigned int) input specifies the maximum num
ber
* of registers per thread;
* - ::CU_JIT_THREADS_PER_BLOCK: (unsigned int) input specifies number of
* threads per block to target compilation for; output returns the number o
f
* threads the compiler actually targeted;
* - ::CU_JIT_WALL_TIME: (float) output returns the float value of wall clo
ck
* time, in milliseconds, spent compiling the \e PTX code;
* - ::CU_JIT_INFO_LOG_BUFFER: (char*) input is a pointer to a buffer in
* which to print any informational log messages from \e PTX assembly (the
* buffer size is specified via option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)
;
* - ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size
in
* bytes of the buffer; output is the number of bytes filled with messages;
* - ::CU_JIT_ERROR_LOG_BUFFER: (char*) input is a pointer to a buffer in
* which to print any error log messages from \e PTX assembly (the buffer s
ize
* is specified via option ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
* - ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size
in
* bytes of the buffer; output is the number of bytes filled with messages;
* - ::CU_JIT_OPTIMIZATION_LEVEL: (unsigned int) input is the level of
* optimization to apply to generated code (0 - 4), with 4 being the defaul
t
* and highest level;
* - ::CU_JIT_TARGET_FROM_CUCONTEXT: (No option value) causes compilation
* target to be determined based on current attached context (default);
* - ::CU_JIT_TARGET: (unsigned int for enumerated type ::CUjit_target_enum
)
* input is the compilation target based on supplied ::CUjit_target_enum;
* possible values are:
* - ::CU_TARGET_COMPUTE_10
* - ::CU_TARGET_COMPUTE_11
* - ::CU_TARGET_COMPUTE_12
* - ::CU_TARGET_COMPUTE_13
* - ::CU_TARGET_COMPUTE_20
* - ::CU_JIT_FALLBACK_STRATEGY: (unsigned int for enumerated type
* ::CUjit_fallback_enum) chooses fallback strategy if matching cubin is no
t
* found; possible values are:
* - ::CU_PREFER_PTX
* - ::CU_PREFER_BINARY
*
* \param module - Returned module
* \param image - Module data to load
* \param numOptions - Number of options
* \param options - Options for JIT
* \param optionValues - Option values for JIT
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_NO_BINARY_FOR_GPU,
* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, un
signed int numOptions, CUjit_option *options, void **optionValues);
/************************************ /**
** * \brief Load a module's data
** Array management *
** * Takes a pointer \p fatCubin and loads the corresponding module \p module
***********************************/ * into the current context. The pointer represents a <i>fat binary</i> obj
ect,
* which is a collection of different \e cubin files, all representing the
same
* device code, but compiled and optimized for different architectures. The
re
* is currently no documented API for constructing and using fat binary obj
ects
* by programmers, and therefore this function is an internal function in t
his
* version of CUDA. More information can be found in the \b nvcc document.
*
* \param module - Returned module
* \param fatCubin - Fat binary to load
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_NO_BINARY_FOR_GPU,
* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCub
in);
CUresult CUDAAPI cuArrayCreate( CUarray *pHandle, const CUDA_ARRAY_DES /**
CRIPTOR *pAllocateArray ); * \brief Unloads a module
CUresult CUDAAPI cuArrayGetDescriptor( CUDA_ARRAY_DESCRIPTOR *pArrayDe *
scriptor, CUarray hArray ); * Unloads a module \p hmod from the current context.
CUresult CUDAAPI cuArrayDestroy( CUarray hArray ); *
* \param hmod - Module to unload
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary
*/
CUresult CUDAAPI cuModuleUnload(CUmodule hmod);
CUresult CUDAAPI cuArray3DCreate( CUarray *pHandle, const CUDA_ARRAY3D /**
_DESCRIPTOR *pAllocateArray ); * \brief Returns a function handle
CUresult CUDAAPI cuArray3DGetDescriptor( CUDA_ARRAY3D_DESCRIPTOR *pArr *
ayDescriptor, CUarray hArray ); * Returns in \p *hfunc the handle of the function of name \p name located
in
* module \p hmod. If no function of that name exists, ::cuModuleGetFunctio
n()
* returns ::CUDA_ERROR_NOT_FOUND.
*
* \param hfunc - Returned function handle
* \param hmod - Module to retrieve function from
* \param name - Name of function to retrieve
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND
* \notefnerr
*
* \sa ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, cons
t char *name);
/************************************ #if __CUDA_API_VERSION >= 3020
** /**
** Texture reference management * \brief Returns a global pointer from a module
** *
***********************************/ * Returns in \p *dptr and \p *bytes the base pointer and size of the
CUresult CUDAAPI cuTexRefCreate( CUtexref *pTexRef ); * global of name \p name located in module \p hmod. If no variable of that
CUresult CUDAAPI cuTexRefDestroy( CUtexref hTexRef ); name
* exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND. Both
* parameters \p dptr and \p bytes are optional. If one of them is
* NULL, it is ignored.
*
* \param dptr - Returned global device pointer
* \param bytes - Returned global size in bytes
* \param hmod - Module to retrieve global from
* \param name - Name of global to retrieve
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodu
le hmod, const char *name);
#endif /* __CUDA_API_VERSION >= 3020 */
CUresult CUDAAPI cuTexRefSetArray( CUtexref hTexRef, CUarray hArray, u /**
nsigned int Flags ); * \brief Returns a handle to a texture reference
CUresult CUDAAPI cuTexRefSetAddress( unsigned int *ByteOffset, CUtexre *
f hTexRef, CUdeviceptr dptr, unsigned int bytes ); * Returns in \p *pTexRef the handle of the texture reference of name \p na
CUresult CUDAAPI cuTexRefSetAddress2D( CUtexref hTexRef, const CUDA_AR me
RAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); * in the module \p hmod. If no texture reference of that name exists,
CUresult CUDAAPI cuTexRefSetFormat( CUtexref hTexRef, CUarray_format f * ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture refer
mt, int NumPackedComponents ); ence
CUresult CUDAAPI cuTexRefSetAddressMode( CUtexref hTexRef, int dim, CU * handle should not be destroyed, since it will be destroyed when the modu
address_mode am ); le
CUresult CUDAAPI cuTexRefSetFilterMode( CUtexref hTexRef, CUfilter_mod * is unloaded.
e fm ); *
CUresult CUDAAPI cuTexRefSetFlags( CUtexref hTexRef, unsigned int Flag * \param pTexRef - Returned texture reference
s ); * \param hmod - Module to retrieve texture reference from
* \param name - Name of texture reference to retrieve
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetSurfRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const
char *name);
CUresult CUDAAPI cuTexRefGetAddress( CUdeviceptr *pdptr, CUtexref hTex /**
Ref ); * \brief Returns a handle to a surface reference
CUresult CUDAAPI cuTexRefGetArray( CUarray *phArray, CUtexref hTexRef *
); * Returns in \p *pSurfRef the handle of the surface reference of name \p n
CUresult CUDAAPI cuTexRefGetAddressMode( CUaddress_mode *pam, CUtexref ame
hTexRef, int dim ); * in the module \p hmod. If no surface reference of that name exists,
CUresult CUDAAPI cuTexRefGetFilterMode( CUfilter_mode *pfm, CUtexref h * ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND.
TexRef ); *
CUresult CUDAAPI cuTexRefGetFormat( CUarray_format *pFormat, int *pNum * \param pSurfRef - Returned surface reference
Channels, CUtexref hTexRef ); * \param hmod - Module to retrieve surface reference from
CUresult CUDAAPI cuTexRefGetFlags( unsigned int *pFlags, CUtexref hTex * \param name - Name of surface reference to retrieve
Ref ); *
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_FOUND
* \notefnerr
*
* \sa ::cuModuleGetFunction,
* ::cuModuleGetGlobal,
* ::cuModuleGetTexRef,
* ::cuModuleLoad,
* ::cuModuleLoadData,
* ::cuModuleLoadDataEx,
* ::cuModuleLoadFatBinary,
* ::cuModuleUnload
*/
CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, con
st char *name);
/************************************ /** @} */ /* END CUDA_MODULE */
**
** Surface reference management
**
***********************************/
CUresult CUDAAPI cuSurfRefSetArray( CUsurfref hSurfRef, CUarray hArray /**
, unsigned int Flags ); * \defgroup CUDA_MEM Memory Management
CUresult CUDAAPI cuSurfRefGetArray( CUarray *phArray, CUsurfref hSurfR *
ef ); * This section describes the memory management functions of the low-level
CUDA
* driver application programming interface.
*
* @{
*/
/************************************ #if __CUDA_API_VERSION >= 3020
** /**
** Parameter management * \brief Gets free and total memory
** *
***********************************/ * Returns in \p *free and \p *total respectively, the free and total amoun
t of
* memory available for allocation by the CUDA context, in bytes.
*
* \param free - Returned free memory in bytes
* \param total - Returned total memory in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total);
CUresult CUDAAPI cuParamSetSize (CUfunction hfunc, unsigned int numbyt /**
es); * \brief Allocates device memory
CUresult CUDAAPI cuParamSeti (CUfunction hfunc, int offset, unsigne *
d int value); * Allocates \p bytesize bytes of linear memory on the device and returns i
CUresult CUDAAPI cuParamSetf (CUfunction hfunc, int offset, float v n
alue); * \p *dptr a pointer to the allocated memory. The allocated memory is suit
CUresult CUDAAPI cuParamSetv (CUfunction hfunc, int offset, void *p ably
tr, unsigned int numbytes); * aligned for any kind of variable. The memory is not cleared. If \p bytes
CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtex ize
ref hTexRef); * is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE.
*
* \param dptr - Returned device pointer
* \param bytesize - Requested allocation size in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
/************************************ /**
** * \brief Allocates pitched device memory
** Launch functions *
** * Allocates at least \p WidthInBytes * \p Height bytes of linear memory on
***********************************/ * the device and returns in \p *dptr a pointer to the allocated memory. Th
e
* function may pad the allocation to ensure that corresponding pointers in
* any given row will continue to meet the alignment requirements for
* coalescing as the address is updated from row to row. \p ElementSizeByte
s
* specifies the size of the largest reads and writes that will be performe
d
* on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coales
ced
* memory transactions are not possible on other data sizes). If
* \p ElementSizeBytes is smaller than the actual read/write size of a kern
el,
* the kernel will run correctly, but possibly at reduced speed. The pitch
* returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of t
he
* allocation. The intended usage of pitch is as a separate parameter of th
e
* allocation, used to compute addresses within the 2D array. Given the row
* and column of an array element of type \b T, the address is computed as:
* \code
T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
* \endcode
*
* The pitch returned by ::cuMemAllocPitch() is guaranteed to work with
* ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it
is
* recommended that programmers consider performing pitch allocations using
* ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this
is
* especially true if the application will be performing 2D memory copies
* between different regions of device memory (whether linear memory or CUD
A
* arrays).
*
* The byte alignment of the pitch returned by ::cuMemAllocPitch() is guara
nteed
* to match or exceed the alignment requirement for texture binding with
* ::cuTexRefSetAddress2D().
*
* \param dptr - Returned device pointer
* \param pPitch - Returned pitch of allocation in bytes
* \param WidthInBytes - Requested allocation width in bytes
* \param Height - Requested allocation height in rows
* \param ElementSizeBytes - Size of largest reads/writes for range
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t
WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
CUresult CUDAAPI cuLaunch ( CUfunction f ); /**
CUresult CUDAAPI cuLaunchGrid (CUfunction f, int grid_width, int grid_h * \brief Frees device memory
eight); *
CUresult CUDAAPI cuLaunchGridAsync( CUfunction f, int grid_width, int g * Frees the memory space pointed to by \p dptr, which must have been retur
rid_height, CUstream hStream ); ned
* by a previous call to ::cuMemAlloc() or ::cuMemAllocPitch().
*
* \param dptr - Pointer to memory to free
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);
/************************************ /**
** * \brief Get information on memory allocations
** Events *
** * Returns the base address in \p *pbase and size in \p *psize of the
***********************************/ * allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the in
CUresult CUDAAPI cuEventCreate( CUevent *phEvent, unsigned int Flags ); put
CUresult CUDAAPI cuEventRecord( CUevent hEvent, CUstream hStream ); * pointer \p dptr. Both parameters \p pbase and \p psize are optional. If
CUresult CUDAAPI cuEventQuery( CUevent hEvent ); one
CUresult CUDAAPI cuEventSynchronize( CUevent hEvent ); * of them is NULL, it is ignored.
CUresult CUDAAPI cuEventDestroy( CUevent hEvent ); *
CUresult CUDAAPI cuEventElapsedTime( float *pMilliseconds, CUevent hSta * \param pbase - Returned base address
rt, CUevent hEnd ); * \param psize - Returned size of device memory allocation
* \param dptr - Device pointer to query
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CU
deviceptr dptr);
/************************************ /**
** * \brief Allocates page-locked host memory
** Streams *
** * Allocates \p bytesize bytes of host memory that is page-locked and
***********************************/ * accessible to the device. The driver tracks the virtual memory ranges
CUresult CUDAAPI cuStreamCreate( CUstream *phStream, unsigned int Flag * allocated with this function and automatically accelerates calls to
s ); * functions such as ::cuMemcpy(). Since the memory can be accessed directl
CUresult CUDAAPI cuStreamQuery( CUstream hStream ); y by
CUresult CUDAAPI cuStreamSynchronize( CUstream hStream ); * the device, it can be read or written with much higher bandwidth than
CUresult CUDAAPI cuStreamDestroy( CUstream hStream ); * pageable memory obtained with functions such as ::malloc(). Allocating
* excessive amounts of memory with ::cuMemAllocHost() may degrade system
* performance, since it reduces the amount of memory available to the syst
em
* for paging. As a result, this function is best used sparingly to allocat
e
* staging areas for data exchange between host and device.
*
* \param pp - Returned host pointer to page-locked memory
* \param bytesize - Requested allocation size in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize);
#endif /* __CUDA_API_VERSION >= 3020 */
/************************************ /**
** * \brief Frees page-locked host memory
** Graphics interop *
** * Frees the memory space pointed to by \p p, which must have been returned
***********************************/ by
CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resour * a previous call to ::cuMemAllocHost().
ce); *
CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( CUarray *pArray, * \param p - Pointer to memory to free
CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel *
); * \return
CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( CUdeviceptr *pDevP * ::CUDA_SUCCESS,
tr, unsigned int *pSize, CUgraphicsResource resource ); * ::CUDA_ERROR_DEINITIALIZED,
CUresult CUDAAPI cuGraphicsResourceSetMapFlags( CUgraphicsResource reso * ::CUDA_ERROR_NOT_INITIALIZED,
urce, unsigned int flags ); * ::CUDA_ERROR_INVALID_CONTEXT,
CUresult CUDAAPI cuGraphicsMapResources( unsigned int count, CUgraphics * ::CUDA_ERROR_INVALID_VALUE
Resource *resources, CUstream hStream ); * \notefnerr
CUresult CUDAAPI cuGraphicsUnmapResources( unsigned int count, CUgraphi *
csResource *resources, CUstream hStream ); * \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemFreeHost(void *p);
/************************************ /**
** * \brief Allocates page-locked host memory
** Export tables *
** * Allocates \p bytesize bytes of host memory that is page-locked and acces
***********************************/ sible
CUresult CUDAAPI cuGetExportTable( const void **ppExportTable, const CU * to the device. The driver tracks the virtual memory ranges allocated wit
uuid *pExportTableId ); h
* this function and automatically accelerates calls to functions such as
* ::cuMemcpyHtoD(). Since the memory can be accessed directly by the devic
e,
* it can be read or written with much higher bandwidth than pageable memor
y
* obtained with functions such as ::malloc(). Allocating excessive amounts
of
* pinned memory may degrade system performance, since it reduces the amoun
t
* of memory available to the system for paging. As a result, this function
is
* best used sparingly to allocate staging areas for data exchange between
* host and device.
*
* The \p Flags parameter enables different options to be specified that
* affect the allocation, as follows.
*
* - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be
* considered as pinned memory by all CUDA contexts, not just the one tha
t
* performed the allocation.
*
* - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address
* space. The device pointer to the memory may be obtained by calling
* ::cuMemHostGetDevicePointer(). This feature is available only on GPUs
* with compute capability greater than or equal to 1.1.
*
* - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combine
d
* (WC). WC memory can be transferred across the PCI Express bus more
* quickly on some system configurations, but cannot be read efficiently
by
* most CPUs. WC memory is a good option for buffers that will be written
by
* the CPU and read by the GPU via mapped pinned memory or host->device
* transfers.
*
* All of these flags are orthogonal to one another: a developer may alloca
te
* memory that is portable, mapped and/or write-combined with no restrictio
ns.
*
* The CUDA context must have been created with the ::CU_CTX_MAP_HOST flag
in
* order for the ::CU_MEMHOSTALLOC_MAPPED flag to have any effect.
*
* The ::CU_MEMHOSTALLOC_MAPPED flag may be specified on CUDA contexts for
* devices that do not support mapped pinned memory. The failure is deferre
d
* to ::cuMemHostGetDevicePointer() because the memory may be mapped into
* other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag.
*
* The memory allocated by this function must be freed with ::cuMemFreeHost
().
*
* \param pp - Returned host pointer to page-locked memory
* \param bytesize - Requested allocation size in bytes
* \param Flags - Flags for allocation request
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Fl
ags);
/************************************ #if __CUDA_API_VERSION >= 3020
** /**
** Limits * \brief Passes back device pointer of mapped pinned memory
** *
***********************************/ * Passes back the device pointer \p pdptr corresponding to the mapped, pin
ned
* host buffer \p p allocated by ::cuMemHostAlloc.
*
* ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMALLOCHOST_DEVICEM
AP
* flag was not specified at the time the memory was allocated, or if the
* function is called on a GPU that does not support mapped pinned memory.
*
* \p Flags provides for future releases. For now, it must be set to 0.
*
* \param pdptr - Returned device pointer
* \param p - Host pointer
* \param Flags - Options (must be 0)
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, uns
igned int Flags);
#endif /* __CUDA_API_VERSION >= 3020 */
CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value); /**
CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit); * \brief Passes back flags that were used for a pinned allocation
*
* Passes back the flags \p pFlags that were specified when allocating
* the pinned host buffer \p p allocated by ::cuMemHostAlloc.
*
* ::cuMemHostGetFlags() will fail if the pointer does not reside in
* an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc().
*
* \param pFlags - Returned flags word
* \param p - Host pointer
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuMemAllocHost, ::cuMemHostAlloc
*/
CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Copies memory from Host to Device
*
* Copies from host memory to device memory. \p dstDevice and \p srcHost ar
e
* the base addresses of the destination and source, respectively. \p ByteC
ount
* specifies the number of bytes to copy. Note that this function is
* synchronous.
*
* \param dstDevice - Destination device pointer
* \param srcHost - Source host pointer
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, s
ize_t ByteCount);
/**
* \brief Copies memory from Device to Host
*
* Copies from device to host memory. \p dstHost and \p srcDevice specify t
he
* base pointers of the destination and source, respectively. \p ByteCount
* specifies the number of bytes to copy. Note that this function is
* synchronous.
*
* \param dstHost - Destination host pointer
* \param srcDevice - Source device pointer
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t
ByteCount);
/**
* \brief Copies memory from Device to Device
*
* Copies from device memory to device memory. \p dstDevice and \p srcDevic
e
* are the base pointers of the destination and source, respectively.
* \p ByteCount specifies the number of bytes to copy. Note that this funct
ion
* is asynchronous.
*
* \param dstDevice - Destination device pointer
* \param srcDevice - Source device pointer
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice,
size_t ByteCount);
/**
* \brief Copies memory from Device to Array
*
* Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffs
et
* specify the CUDA array handle and starting index of the destination data
.
* \p srcDevice specifies the base pointer of the source. \p ByteCount
* specifies the number of bytes to copy.
*
* \param dstArray - Destination array
* \param dstOffset - Offset in bytes of destination array
* \param srcDevice - Source device pointer
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync
,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdevicep
tr srcDevice, size_t ByteCount);
/**
* \brief Copies memory from Array to Device
*
* Copies from one 1D CUDA array to device memory. \p dstDevice specifies t
he
* base pointer of the destination and must be naturally aligned with the C
UDA
* array elements. \p srcArray and \p srcOffset specify the CUDA array hand
le
* and the offset in bytes into the array where the copy is to begin.
* \p ByteCount specifies the number of bytes to copy and must be evenly
* divisible by the array element size.
*
* \param dstDevice - Destination device pointer
* \param srcArray - Source array
* \param srcOffset - Offset in bytes of source array
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size
_t srcOffset, size_t ByteCount);
/**
* \brief Copies memory from Host to Array
*
* Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
* specify the CUDA array handle and starting offset in bytes of the destin
ation
* data. \p pSrc specifies the base address of the source. \p ByteCount sp
ecifies
* the number of bytes to copy.
*
* \param dstArray - Destination array
* \param dstOffset - Offset in bytes of destination array
* \param srcHost - Source host pointer
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const voi
d *srcHost, size_t ByteCount);
/**
* \brief Copies memory from Array to Host
*
* Copies from one 1D CUDA array to host memory. \p dstHost specifies the b
ase
* pointer of the destination. \p srcArray and \p srcOffset specify the CUD
A
* array handle and starting offset in bytes of the source data.
* \p ByteCount specifies the number of bytes to copy.
*
* \param dstHost - Destination device pointer
* \param srcArray - Source array
* \param srcOffset - Offset in bytes of source array
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync
,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOf
fset, size_t ByteCount);
/**
* \brief Copies memory from Array to Array
*
* Copies from one 1D CUDA array to another. \p dstArray and \p srcArray
* specify the handles of the destination and source CUDA arrays for the co
py,
* respectively. \p dstOffset and \p srcOffset specify the destination and
* source offsets in bytes into the CUDA arrays. \p ByteCount is the number
of
* bytes to be copied. The size of the elements in the CUDA arrays need not
be
* the same format, but the elements must be the same size; and count must
be
* evenly divisible by that size.
*
* \param dstArray - Destination array
* \param dstOffset - Offset in bytes of destination array
* \param srcArray - Source array
* \param srcOffset - Offset in bytes of source array
* \param ByteCount - Size of memory copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray s
rcArray, size_t srcOffset, size_t ByteCount);
/**
* \brief Copies memory for 2D arrays
*
* Perform a 2D memory copy according to the parameters specified in \p pCo
py.
* The ::CUDA_MEMCPY2D structure is defined as:
*
* \code
typedef struct CUDA_MEMCPY2D_st {
unsigned int srcXInBytes, srcY;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
unsigned int srcPitch;
unsigned int dstXInBytes, dstY;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
unsigned int dstPitch;
unsigned int WidthInBytes;
unsigned int Height;
} CUDA_MEMCPY2D;
* \endcode
* where:
* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
* source and destination, respectively; ::CUmemorytype_enum is defined a
s:
*
* \code
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
* \endcode
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
* specify the (host) base address of the source data and the bytes per row
to
* apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
* specify the (device) base address of the source data and the bytes per r
ow
* to apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
* ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
* specify the (host) base address of the destination data and the bytes pe
r
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
* specify the (device) base address of the destination data and the bytes
per
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
e
* ignored.
*
* - ::srcXInBytes and ::srcY specify the base address of the source data f
or
* the copy.
*
* \par
* For host pointers, the starting address is
* \code
void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
* element size.
*
* - ::dstXInBytes and ::dstY specify the base address of the destination d
ata
* for the copy.
*
* \par
* For host pointers, the base address is
* \code
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
* element size.
*
* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
* the 2D copy being performed.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
*
* \par
* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
ck
* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
ies
* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
* significantly slower in the cases where ::cuMemcpy2D() would have return
ed
* an error code.
*
* \param pCopy - Parameters for the memory copy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
/**
* \brief Copies memory for 2D arrays
*
* Perform a 2D memory copy according to the parameters specified in \p pCo
py.
* The ::CUDA_MEMCPY2D structure is defined as:
*
* \code
typedef struct CUDA_MEMCPY2D_st {
unsigned int srcXInBytes, srcY;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
unsigned int srcPitch;
unsigned int dstXInBytes, dstY;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
unsigned int dstPitch;
unsigned int WidthInBytes;
unsigned int Height;
} CUDA_MEMCPY2D;
* \endcode
* where:
* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
* source and destination, respectively; ::CUmemorytype_enum is defined a
s:
*
* \code
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
* \endcode
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
* specify the (host) base address of the source data and the bytes per row
to
* apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
* specify the (device) base address of the source data and the bytes per r
ow
* to apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
* ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
* specify the (host) base address of the destination data and the bytes pe
r
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
* specify the (device) base address of the destination data and the bytes
per
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
e
* ignored.
*
* - ::srcXInBytes and ::srcY specify the base address of the source data f
or
* the copy.
*
* \par
* For host pointers, the starting address is
* \code
void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
* element size.
*
* - ::dstXInBytes and ::dstY specify the base address of the destination d
ata
* for the copy.
*
* \par
* For host pointers, the base address is
* \code
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
* element size.
*
* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
* the 2D copy being performed.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
*
* \par
* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
ck
* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
ies
* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
* significantly slower in the cases where ::cuMemcpy2D() would have return
ed
* an error code.
*
* \param pCopy - Parameters for the memory copy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
/**
* \brief Copies memory for 3D arrays
*
* Perform a 3D memory copy according to the parameters specified in
* \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
*
* \code
typedef struct CUDA_MEMCPY3D_st {
unsigned int srcXInBytes, srcY, srcZ;
unsigned int srcLOD;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
unsigned int srcPitch; // ignored when src is array
unsigned int srcHeight; // ignored when src is array; may b
e 0 if Depth==1
unsigned int dstXInBytes, dstY, dstZ;
unsigned int dstLOD;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
unsigned int dstPitch; // ignored when dst is array
unsigned int dstHeight; // ignored when dst is array; may b
e 0 if Depth==1
unsigned int WidthInBytes;
unsigned int Height;
unsigned int Depth;
} CUDA_MEMCPY3D;
* \endcode
* where:
* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
* source and destination, respectively; ::CUmemorytype_enum is defined a
s:
*
* \code
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
* \endcode
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
* ::srcHeight specify the (host) base address of the source data, the byte
s
* per row, and the height of each 2D slice of the 3D array. ::srcArray is
* ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an
d
* ::srcHeight specify the (device) base address of the source data, the by
tes
* per row, and the height of each 2D slice of the 3D array. ::srcArray is
* ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
* handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
* ::srcHeight are ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
* specify the (host) base address of the destination data, the bytes per r
ow,
* and the height of each 2D slice of the 3D array. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
* specify the (device) base address of the destination data, the bytes per
* row, and the height of each 2D slice of the 3D array. ::dstArray is igno
red.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
* handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
* ::dstHeight are ignored.
*
* - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc
e
* data for the copy.
*
* \par
* For host pointers, the starting address is
* \code
void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + src
XInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
* element size.
*
* - dstXInBytes, ::dstY and ::dstZ specify the base address of the
* destination data for the copy.
*
* \par
* For host pointers, the base address is
* \code
void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch +
dstXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInByt
es;
* \endcode
*
* \par
* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
* element size.
*
* - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei
ght
* and depth of the 3D copy being performed.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
* - If specified, ::srcHeight must be greater than or equal to ::Height +
* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
dstY.
*
* \par
* ::cuMemcpy3D() returns an error if any pitch is greater than the maximum
* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
*
* The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must
be
* set to 0.
*
* \param pCopy - Parameters for the memory copy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
/**
* \brief Copies memory from Host to Device
*
* Copies from host memory to device memory. \p dstDevice and \p srcHost ar
e
* the base addresses of the destination and source, respectively. \p ByteC
ount
* specifies the number of bytes to copy.
*
* ::cuMemcpyHtoDAsync() is asynchronous and can optionally be associated t
o a
* stream by passing a non-zero \p hStream argument. It only works on
* page-locked memory and returns an error if a pointer to pageable memory
is
* passed as input.
*
* \param dstDevice - Destination device pointer
* \param srcHost - Source host pointer
* \param ByteCount - Size of memory copy in bytes
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo
st, size_t ByteCount, CUstream hStream);
/**
* \brief Copies memory from Device to Host
*
* Copies from device to host memory. \p dstHost and \p srcDevice specify t
he
* base pointers of the destination and source, respectively. \p ByteCount
* specifies the number of bytes to copy.
*
* ::cuMemcpyDtoHAsync() is asynchronous and can optionally be associated t
o a
* stream by passing a non-zero \p hStream argument. It only works on
* page-locked memory and returns an error if a pointer to pageable memory
is
* passed as input.
*
* \param dstHost - Destination host pointer
* \param srcDevice - Source device pointer
* \param ByteCount - Size of memory copy in bytes
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, si
ze_t ByteCount, CUstream hStream);
/**
* \brief Copies memory from Device to Device
*
* Copies from device memory to device memory. \p dstDevice and \p srcDevic
e
* are the base pointers of the destination and source, respectively.
* \p ByteCount specifies the number of bytes to copy. Note that this funct
ion
* is asynchronous and can optionally be associated to a stream by passing
a
* non-zero \p hStream argument
*
* \param dstDevice - Destination device pointer
* \param srcDevice - Source device pointer
* \param ByteCount - Size of memory copy in bytes
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe
vice, size_t ByteCount, CUstream hStream);
/**
* \brief Copies memory from Host to Array
*
* Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
* specify the CUDA array handle and starting offset in bytes of the
* destination data. \p srcHost specifies the base address of the source.
* \p ByteCount specifies the number of bytes to copy.
*
* ::cuMemcpyHtoAAsync() is asynchronous and can optionally be associated t
o a
* stream by passing a non-zero \p hStream argument. It only works on
* page-locked memory and returns an error if a pointer to pageable memory
is
* passed as input.
*
* \param dstArray - Destination array
* \param dstOffset - Offset in bytes of destination array
* \param srcHost - Source host pointer
* \param ByteCount - Size of memory copy in bytes
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, cons
t void *srcHost, size_t ByteCount, CUstream hStream);
/**
* \brief Copies memory from Array to Host
*
* Copies from one 1D CUDA array to host memory. \p dstHost specifies the b
ase
* pointer of the destination. \p srcArray and \p srcOffset specify the CUD
A
* array handle and starting offset in bytes of the source data.
* \p ByteCount specifies the number of bytes to copy.
*
* ::cuMemcpyAtoHAsync() is asynchronous and can optionally be associated t
o a
* stream by passing a non-zero \p stream argument. It only works on
* page-locked host memory and returns an error if a pointer to pageable
* memory is passed as input.
*
* \param dstHost - Destination pointer
* \param srcArray - Source array
* \param srcOffset - Offset in bytes of source array
* \param ByteCount - Size of memory copy in bytes
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t
srcOffset, size_t ByteCount, CUstream hStream);
/**
* \brief Copies memory for 2D arrays
*
* Perform a 2D memory copy according to the parameters specified in \p pCo
py.
* The ::CUDA_MEMCPY2D structure is defined as:
*
* \code
typedef struct CUDA_MEMCPY2D_st {
unsigned int srcXInBytes, srcY;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
unsigned int srcPitch;
unsigned int dstXInBytes, dstY;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
unsigned int dstPitch;
unsigned int WidthInBytes;
unsigned int Height;
} CUDA_MEMCPY2D;
* \endcode
* where:
* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
* source and destination, respectively; ::CUmemorytype_enum is defined a
s:
*
* \code
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
* \endcode
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
* specify the (host) base address of the source data and the bytes per row
to
* apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
* specify the (device) base address of the source data and the bytes per r
ow
* to apply. ::srcArray is ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
* ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
* specify the (host) base address of the destination data and the bytes pe
r
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
* specify the (device) base address of the destination data and the bytes
per
* row to apply. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
e
* ignored.
*
* - ::srcXInBytes and ::srcY specify the base address of the source data f
or
* the copy.
*
* \par
* For host pointers, the starting address is
* \code
void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
* element size.
*
* - ::dstXInBytes and ::dstY specify the base address of the destination d
ata
* for the copy.
*
* \par
* For host pointers, the base address is
* \code
void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
* element size.
*
* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
* the 2D copy being performed.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
* - If specified, ::srcHeight must be greater than or equal to ::Height +
* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
dstY.
*
* \par
* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
ck
* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
ies
* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
* significantly slower in the cases where ::cuMemcpy2D() would have return
ed
* an error code.
*
* ::cuMemcpy2DAsync() is asynchronous and can optionally be associated to
a
* stream by passing a non-zero \p hStream argument. It only works on
* page-locked host memory and returns an error if a pointer to pageable
* memory is passed as input.
*
* \param pCopy - Parameters for the memory copy
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre
am);
/**
* \brief Copies memory for 3D arrays
*
* Perform a 3D memory copy according to the parameters specified in
* \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
*
* \code
typedef struct CUDA_MEMCPY3D_st {
unsigned int srcXInBytes, srcY, srcZ;
unsigned int srcLOD;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
unsigned int srcPitch; // ignored when src is array
unsigned int srcHeight; // ignored when src is array; may b
e 0 if Depth==1
unsigned int dstXInBytes, dstY, dstZ;
unsigned int dstLOD;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
unsigned int dstPitch; // ignored when dst is array
unsigned int dstHeight; // ignored when dst is array; may b
e 0 if Depth==1
unsigned int WidthInBytes;
unsigned int Height;
unsigned int Depth;
} CUDA_MEMCPY3D;
* \endcode
* where:
* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
* source and destination, respectively; ::CUmemorytype_enum is defined a
s:
*
* \code
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
* \endcode
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
* ::srcHeight specify the (host) base address of the source data, the byte
s
* per row, and the height of each 2D slice of the 3D array. ::srcArray is
* ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an
d
* ::srcHeight specify the (device) base address of the source data, the by
tes
* per row, and the height of each 2D slice of the 3D array. ::srcArray is
* ignored.
*
* \par
* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
* handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
* ::srcHeight are ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
* specify the (host) base address of the destination data, the bytes per r
ow,
* and the height of each 2D slice of the 3D array. ::dstArray is ignored.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
* specify the (device) base address of the destination data, the bytes per
* row, and the height of each 2D slice of the 3D array. ::dstArray is igno
red.
*
* \par
* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
* handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
* ::dstHeight are ignored.
*
* - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc
e
* data for the copy.
*
* \par
* For host pointers, the starting address is
* \code
void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + src
XInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes;
* \endcode
*
* \par
* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
* element size.
*
* - dstXInBytes, ::dstY and ::dstZ specify the base address of the
* destination data for the copy.
*
* \par
* For host pointers, the base address is
* \code
void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch +
dstXInBytes);
* \endcode
*
* \par
* For device pointers, the starting address is
* \code
CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInByt
es;
* \endcode
*
* \par
* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
* element size.
*
* - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei
ght
* and depth of the 3D copy being performed.
* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
es +
* ::srcXInBytes, and ::dstPitch must be greater than or equal to
* ::WidthInBytes + dstXInBytes.
* - If specified, ::srcHeight must be greater than or equal to ::Height +
* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
dstY.
*
* \par
* ::cuMemcpy3D() returns an error if any pitch is greater than the maximum
* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
*
* ::cuMemcpy3DAsync() is asynchronous and can optionally be associated to
a
* stream by passing a non-zero \p hStream argument. It only works on
* page-locked host memory and returns an error if a pointer to pageable
* memory is passed as input.
*
* The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must
be
* set to 0.
*
* \param pCopy - Parameters for the memory copy
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre
am);
/**
* \brief Initializes device memory
*
* Sets the memory range of \p N 8-bit values to the specified value
* \p uc.
*
* \param dstDevice - Destination device pointer
* \param uc - Value to set
* \param N - Number of elements
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t
N);
/**
* \brief Initializes device memory
*
* Sets the memory range of \p N 16-bit values to the specified value
* \p us.
*
* \param dstDevice - Destination device pointer
* \param us - Value to set
* \param N - Number of elements
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size
_t N);
/**
* \brief Initializes device memory
*
* Sets the memory range of \p N 32-bit values to the specified value
* \p ui.
*
* \param dstDevice - Destination device pointer
* \param ui - Value to set
* \param N - Number of elements
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t
N);
/**
* \brief Initializes device memory
*
* Sets the 2D memory range of \p Width 8-bit values to the specified value
* \p uc. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param uc - Value to set
* \param Width - Width of row
* \param Height - Number of rows
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsig
ned char uc, size_t Width, size_t Height);
/**
* \brief Initializes device memory
*
* Sets the 2D memory range of \p Width 16-bit values to the specified valu
e
* \p us. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param us - Value to set
* \param Width - Width of row
* \param Height - Number of rows
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsi
gned short us, size_t Width, size_t Height);
/**
* \brief Initializes device memory
*
* Sets the 2D memory range of \p Width 32-bit values to the specified valu
e
* \p ui. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param ui - Value to set
* \param Width - Width of row
* \param Height - Number of rows
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsi
gned int ui, size_t Width, size_t Height);
/**
* \brief Sets device memory
*
* Sets the memory range of \p N 8-bit values to the specified value
* \p uc.
*
* ::cuMemsetD8Async() is asynchronous and can optionally be associated to
a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param uc - Value to set
* \param N - Number of elements
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, s
ize_t N, CUstream hStream);
/**
* \brief Sets device memory
*
* Sets the memory range of \p N 16-bit values to the specified value
* \p us.
*
* ::cuMemsetD16Async() is asynchronous and can optionally be associated to
a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param us - Value to set
* \param N - Number of elements
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us,
size_t N, CUstream hStream);
/**
* \brief Sets device memory
*
* Sets the memory range of \p N 32-bit values to the specified value
* \p ui.
*
* ::cuMemsetD32Async() is asynchronous and can optionally be associated to
a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param ui - Value to set
* \param N - Number of elements
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cu
MemsetD32
*/
CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, s
ize_t N, CUstream hStream);
/**
* \brief Sets device memory
*
* Sets the 2D memory range of \p Width 8-bit values to the specified value
* \p uc. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* ::cuMemsetD2D8Async() is asynchronous and can optionally be associated t
o a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param uc - Value to set
* \param Width - Width of row
* \param Height - Number of rows
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
sync,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch,
unsigned char uc, size_t Width, size_t Height, CUstream hStream);
/**
* \brief Sets device memory
*
* Sets the 2D memory range of \p Width 16-bit values to the specified valu
e
* \p us. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* ::cuMemsetD2D16Async() is asynchronous and can optionally be associated
to a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param us - Value to set
* \param Width - Width of row
* \param Height - Number of rows
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch,
unsigned short us, size_t Width, size_t Height, CUstream hStream);
/**
* \brief Sets device memory
*
* Sets the 2D memory range of \p Width 32-bit values to the specified valu
e
* \p ui. \p Height specifies the number of rows to set, and \p dstPitch
* specifies the number of bytes between each row. This function performs
* fastest when the pitch is one that has been passed back by
* ::cuMemAllocPitch().
*
* ::cuMemsetD2D32Async() is asynchronous and can optionally be associated
to a
* stream by passing a non-zero \p stream argument.
*
* \param dstDevice - Destination device pointer
* \param dstPitch - Pitch of destination device pointer
* \param ui - Value to set
* \param Width - Width of row
* \param Height - Number of rows
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32,
* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
* ::cuMemsetD32, ::cuMemsetD32Async
*/
CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch,
unsigned int ui, size_t Width, size_t Height, CUstream hStream);
/**
* \brief Creates a 1D or 2D CUDA array
*
* Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure
* \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan
dle.
* The ::CUDA_ARRAY_DESCRIPTOR is defined as:
*
* \code
typedef struct {
unsigned int Width;
unsigned int Height;
CUarray_format Format;
unsigned int NumChannels;
} CUDA_ARRAY_DESCRIPTOR;
* \endcode
* where:
*
* - \p Width, and \p Height are the width, and height of the CUDA array (i
n
* elements); the CUDA array is one-dimensional if height is 0, two-dimensi
onal
* otherwise;
* - ::Format specifies the format of the elements; ::CUarray_format is
* defined as:
* \code
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
CU_AD_FORMAT_HALF = 0x10,
CU_AD_FORMAT_FLOAT = 0x20
} CUarray_format;
* \endcode
* - \p NumChannels specifies the number of packed components per CUDA arra
y
* element; it may be 1, 2, or 4;
*
* Here are examples of CUDA array descriptions:
*
* Description for a CUDA array of 2048 floats:
* \code
CUDA_ARRAY_DESCRIPTOR desc;
desc.Format = CU_AD_FORMAT_FLOAT;
desc.NumChannels = 1;
desc.Width = 2048;
desc.Height = 1;
* \endcode
*
* Description for a 64 x 64 CUDA array of floats:
* \code
CUDA_ARRAY_DESCRIPTOR desc;
desc.Format = CU_AD_FORMAT_FLOAT;
desc.NumChannels = 1;
desc.Width = 64;
desc.Height = 64;
* \endcode
*
* Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit
* float16's:
* \code
CUDA_ARRAY_DESCRIPTOR desc;
desc.FormatFlags = CU_AD_FORMAT_HALF;
desc.NumChannels = 4;
desc.Width = width;
desc.Height = height;
* \endcode
*
* Description for a \p width x \p height CUDA array of 16-bit elements, ea
ch
* of which is two 8-bit unsigned chars:
* \code
CUDA_ARRAY_DESCRIPTOR arrayDesc;
desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
desc.NumChannels = 2;
desc.Width = width;
desc.Height = height;
* \endcode
*
* \param pHandle - Returned array
* \param pAllocateArray - Array descriptor
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO
R *pAllocateArray);
/**
* \brief Get a 1D or 2D CUDA array descriptor
*
* Returns in \p *pArrayDescriptor a descriptor containing information on t
he
* format and dimensions of the CUDA array \p hArray. It is useful for
* subroutines that have been passed a CUDA array, but need to know the CUD
A
* array parameters for validation or other purposes.
*
* \param pArrayDescriptor - Returned array descriptor
* \param hArray - Array to get descriptor of
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_HANDLE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript
or, CUarray hArray);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Destroys a CUDA array
*
* Destroys the CUDA array \p hArray.
*
* \param hArray - Array to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ARRAY_IS_MAPPED
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuArrayDestroy(CUarray hArray);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Creates a 3D CUDA array
*
* Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structur
e
* \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan
dle.
* The ::CUDA_ARRAY3D_DESCRIPTOR is defined as:
*
* \code
typedef struct {
unsigned int Width;
unsigned int Height;
unsigned int Depth;
CUarray_format Format;
unsigned int NumChannels;
unsigned int Flags;
} CUDA_ARRAY3D_DESCRIPTOR;
* \endcode
* where:
*
* - \p Width, \p Height, and \p Depth are the width, height, and depth of
the
* CUDA array (in elements); the CUDA array is one-dimensional if height an
d
* depth are 0, two-dimensional if depth is 0, and three-dimensional otherw
ise;
* - ::Format specifies the format of the elements; ::CUarray_format is
* defined as:
* \code
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
CU_AD_FORMAT_HALF = 0x10,
CU_AD_FORMAT_FLOAT = 0x20
} CUarray_format;
* \endcode
* - \p NumChannels specifies the number of packed components per CUDA arra
y
* element; it may be 1, 2, or 4;
* - ::Flags may be set to ::CUDA_ARRAY3D_SURFACE_LDST to enable surface re
ferences
* to be bound to the CUDA array. If this flag is not set, ::cuSurfRefSetA
rray
* will fail when attempting to bind the CUDA array to a surface reference.
*
* Here are examples of CUDA array descriptions:
*
* Description for a CUDA array of 2048 floats:
* \code
CUDA_ARRAY3D_DESCRIPTOR desc;
desc.Format = CU_AD_FORMAT_FLOAT;
desc.NumChannels = 1;
desc.Width = 2048;
desc.Height = 0;
desc.Depth = 0;
* \endcode
*
* Description for a 64 x 64 CUDA array of floats:
* \code
CUDA_ARRAY3D_DESCRIPTOR desc;
desc.Format = CU_AD_FORMAT_FLOAT;
desc.NumChannels = 1;
desc.Width = 64;
desc.Height = 64;
desc.Depth = 0;
* \endcode
*
* Description for a \p width x \p height x \p depth CUDA array of 64-bit,
* 4x16-bit float16's:
* \code
CUDA_ARRAY3D_DESCRIPTOR desc;
desc.FormatFlags = CU_AD_FORMAT_HALF;
desc.NumChannels = 4;
desc.Width = width;
desc.Height = height;
desc.Depth = depth;
* \endcode
*
* \param pHandle - Returned array
* \param pAllocateArray - 3D array descriptor
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa ::cuArray3DGetDescriptor, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR
IPTOR *pAllocateArray);
/**
* \brief Get a 3D CUDA array descriptor
*
* Returns in \p *pArrayDescriptor a descriptor containing information on t
he
* format and dimensions of the CUDA array \p hArray. It is useful for
* subroutines that have been passed a CUDA array, but need to know the CUD
A
* array parameters for validation or other purposes.
*
* This function may be called on 1D and 2D arrays, in which case the \p He
ight
* and/or \p Depth members of the descriptor struct will be set to 0.
*
* \param pArrayDescriptor - Returned 3D array descriptor
* \param hArray - 3D array to get descriptor of
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_HANDLE
* \notefnerr
*
* \sa ::cuArray3DCreate, ::cuArrayCreate,
* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
,
* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
d,
* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
uMemcpyDtoDAsync,
* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
,
* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
*/
CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc
riptor, CUarray hArray);
#endif /* __CUDA_API_VERSION >= 3020 */
/** @} */ /* END CUDA_MEM */
/**
* \defgroup CUDA_STREAM Stream Management
*
* This section describes the stream management functions of the low-level
CUDA
* driver application programming interface.
*
* @{
*/
/**
* \brief Create a stream
*
* Creates a stream and returns a handle in \p phStream. \p Flags is requir
ed
* to be 0.
*
* \param phStream - Returned newly created stream
* \param Flags - Parameters for stream creation (must be 0)
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuStreamDestroy,
* ::cuStreamWaitEvent,
* ::cuStreamQuery,
* ::cuStreamSynchronize
*/
CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags);
/**
* \brief Make a compute stream wait on an event
*
* Makes all future work submitted to \p hStream wait until \p hEvent
* reports completion before beginning execution. This synchronization
* will be performed efficiently on the device.
*
* The stream \p hStream will wait only for the completion of the most rece
nt
* host call to ::cuEventRecord() on \p hEvent. Once this call has returne
d,
* any functions (including ::cuEventRecord() and ::cuEventDestroy()) may b
e
* called on \p hEvent again, and the subsequent calls will not have any
* effect on \p hStream.
*
* If \p hStream is 0 (the NULL stream) any future work submitted in any st
ream
* will wait for \p hEvent to complete before beginning execution. This
* effectively creates a barrier for all future work submitted to the conte
xt.
*
* If ::cuEventRecord() has not been called on \p hEvent, this call acts as
if
* the record has already completed, and so is a functional no-op.
*
* \param hStream - Stream to wait
* \param hEvent - Event to wait on (may not be NULL)
* \param Flags - Parameters for the operation (must be 0)
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* \notefnerr
*
* \sa ::cuStreamCreate,
* ::cuEventRecord,
* ::cuStreamQuery,
* ::cuStreamSynchronize,
* ::cuStreamDestroy
*/
CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsign
ed int Flags);
/**
* \brief Determine status of a compute stream
*
* Returns ::CUDA_SUCCESS if all operations in the stream specified by
* \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not.
*
* \param hStream - Stream to query status of
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_NOT_READY
* \notefnerr
*
* \sa ::cuStreamCreate,
* ::cuStreamWaitEvent,
* ::cuStreamDestroy,
* ::cuStreamSynchronize
*/
CUresult CUDAAPI cuStreamQuery(CUstream hStream);
/**
* \brief Wait until a stream's tasks are completed
*
* Waits until the device has completed all operations in the stream specif
ied
* by \p hStream. If the context was created with the ::CU_CTX_BLOCKING_SYN
C
* flag, the CPU thread will block until the stream is finished with all of
* its tasks.
*
* \param hStream - Stream to wait for
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE
* \notefnerr
*
* \sa ::cuStreamCreate,
* ::cuStreamDestroy,
* ::cuStreamWaitEvent,
* ::cuStreamQuery
*/
CUresult CUDAAPI cuStreamSynchronize(CUstream hStream);
/**
* \brief Destroys a stream
*
* Destroys the stream specified by \p hStream.
*
* \param hStream - Stream to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuStreamCreate,
* ::cuStreamWaitEvent,
* ::cuStreamQuery,
* ::cuStreamSynchronize
*/
CUresult CUDAAPI cuStreamDestroy(CUstream hStream);
/** @} */ /* END CUDA_STREAM */
/**
* \defgroup CUDA_EVENT Event Management
*
* This section describes the event management functions of the low-level C
UDA
* driver application programming interface.
*
* @{
*/
/**
* \brief Creates an event
*
* Creates an event *phEvent with the flags specified via \p Flags. Valid f
lags
* include:
* - ::CU_EVENT_DEFAULT: Default event creation flag.
* - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use
blocking
* synchronization. A CPU thread that uses ::cuEventSynchronize() to wai
t on
* an event created with this flag will block until the event has actuall
y
* been recorded.
* - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not n
eed
* to record timing data. Events created with this flag specified and
* the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best
* performance when used with ::cuStreamWaitEvent() and ::cuEventQuery().
*
* \param phEvent - Returns newly created event
* \param Flags - Event creation flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa
* ::cuEventRecord,
* ::cuEventQuery,
* ::cuEventSynchronize,
* ::cuEventDestroy,
* ::cuEventElapsedTime
*/
CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags);
/**
* \brief Records an event
*
* Records an event. If \p hStream is non-zero, the event is recorded after
all
* preceding operations in \p hStream have been completed; otherwise, it is
* recorded after all preceding operations in the CUDA context have been
* completed. Since operation is asynchronous, ::cuEventQuery and/or
* ::cuEventSynchronize() must be used to determine when the event has actu
ally
* been recorded.
*
* If ::cuEventRecord() has previously been called on \p hEvent, then this
* call will overwrite any existing state in \p hEvent. Any subsequent cal
ls
* which examine the status of \p hEvent will only examine the completion o
f
* this most recent call to ::cuEventRecord().
*
* \param hEvent - Event to record
* \param hStream - Stream to record event for
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuEventCreate,
* ::cuEventQuery,
* ::cuEventSynchronize,
* ::cuStreamWaitEvent,
* ::cuEventDestroy,
* ::cuEventElapsedTime
*/
CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream);
/**
* \brief Queries an event's status
*
* Query the status of all device work preceding the most recent
* call to ::cuEventRecord() (in the appropriate compute streams,
* as specified by the arguments to ::cuEventRecord()).
*
* If this work has successfully been completed by the device, or if
* ::cuEventRecord() has not been called on \p hEvent, then ::CUDA_SUCCESS
is
* returned. If this work has not yet been completed by the device then
* ::CUDA_ERROR_NOT_READY is returned.
*
* \param hEvent - Event to query
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_NOT_READY
* \notefnerr
*
* \sa ::cuEventCreate,
* ::cuEventRecord,
* ::cuEventSynchronize,
* ::cuEventDestroy,
* ::cuEventElapsedTime
*/
CUresult CUDAAPI cuEventQuery(CUevent hEvent);
/**
* \brief Waits for an event to complete
*
* Wait until the completion of all device work preceding the most recent
* call to ::cuEventRecord() (in the appropriate compute streams, as specif
ied
* by the arguments to ::cuEventRecord()).
*
* If ::cuEventRecord() has not been called on \p hEvent, ::CUDA_SUCCESS is
* returned immediately.
*
* Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC
* flag will cause the calling CPU thread to block until the event has
* been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has
* not been set, then the CPU thread will busy-wait until the event has
* been completed by the device.
*
* \param hEvent - Event to wait for
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE
* \notefnerr
*
* \sa ::cuEventCreate,
* ::cuEventRecord,
* ::cuEventQuery,
* ::cuEventDestroy,
* ::cuEventElapsedTime
*/
CUresult CUDAAPI cuEventSynchronize(CUevent hEvent);
/**
* \brief Destroys an event
*
* Destroys the event specified by \p hEvent.
*
* \param hEvent - Event to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE
* \notefnerr
*
* \sa ::cuEventCreate,
* ::cuEventRecord,
* ::cuEventQuery,
* ::cuEventSynchronize,
* ::cuEventElapsedTime
*/
CUresult CUDAAPI cuEventDestroy(CUevent hEvent);
/**
* \brief Computes the elapsed time between two events
*
* Computes the elapsed time between two events (in milliseconds with a
* resolution of around 0.5 microseconds).
*
* If either event was last recorded in a non-NULL stream, the resulting ti
me
* may be greater than expected (even if both used the same stream handle).
This
* happens because the ::cuEventRecord() operation takes place asynchronous
ly
* and there is no guarantee that the measured latency is actually just bet
ween
* the two events. Any number of other different stream operations could ex
ecute
* in between the two measured events, thus altering the timing in a signif
icant
* way.
*
* If ::cuEventRecord() has not been called on either event then
* ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been c
alled
* on both events but one or both of them has not yet been completed (that
is,
* ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of
the
* events), ::CUDA_ERROR_NOT_READY is returned. If either event was created
with
* the ::CU_EVENT_DISABLE_TIMING flag, then this function will return
* ::CUDA_ERROR_INVALID_HANDLE.
*
* \param pMilliseconds - Time between \p hStart and \p hEnd in ms
* \param hStart - Starting event
* \param hEnd - Ending event
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_NOT_READY
* \notefnerr
*
* \sa ::cuEventCreate,
* ::cuEventRecord,
* ::cuEventQuery,
* ::cuEventSynchronize,
* ::cuEventDestroy
*/
CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, C
Uevent hEnd);
/** @} */ /* END CUDA_EVENT */
/**
* \defgroup CUDA_EXEC Execution Control
*
* This section describes the execution control functions of the low-level
CUDA
* driver application programming interface.
*
* @{
*/
/**
* \brief Sets the block-dimensions for the function
*
* Specifies the \p x, \p y, and \p z dimensions of the thread blocks that
are
* created when the kernel given by \p hfunc is launched.
*
* \param hfunc - Kernel to specify dimensions of
* \param x - X dimension
* \param y - Y dimension
* \param z - Z dimension
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetSharedSize,
* ::cuFuncSetCacheConfig,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSeti,
* ::cuParamSetf,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
;
/**
* \brief Sets the dynamic shared-memory size for the function
*
* Sets through \p bytes the amount of dynamic shared memory that will be
* available to each thread block when the kernel given by \p hfunc is laun
ched.
*
* \param hfunc - Kernel to specify dynamic shared-memory size for
* \param bytes - Dynamic shared-memory size per thread in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetCacheConfig,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSeti,
* ::cuParamSetf,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
/**
* \brief Returns information about a function
*
* Returns in \p *pi the integer value of the attribute \p attrib on the ke
rnel
* given by \p hfunc. The supported attributes are:
* - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threa
ds
* per block, beyond which a launch of the function would fail. This numb
er
* depends on both the function and the device on which the function is
* currently loaded.
* - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of
* statically-allocated shared memory per block required by this function
.
* This does not include dynamically-allocated shared memory requested by
* the user at runtime.
* - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-alloca
ted
* constant memory required by this function.
* - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memor
y
* used by each thread of this function.
* - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thr
ead
* of this function.
* - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version
for
* which the function was compiled. This value is the major PTX version *
10
* + the minor PTX version, so a PTX version 1.3 function would return th
e
* value 13. Note that this may return the undefined value of 0 for cubin
s
* compiled prior to CUDA 3.0.
* - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version fo
r
* which the function was compiled. This value is the major binary
* version * 10 + the minor binary version, so a binary version 1.3 funct
ion
* would return the value 13. Note that this will return a value of 10 fo
r
* legacy cubins that do not have a properly-encoded binary architecture
* version.
*
* \param pi - Returned attribute value
* \param attrib - Attribute requested
* \param hfunc - Function to query attribute of
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncSetCacheConfig,
* ::cuParamSetSize,
* ::cuParamSeti,
* ::cuParamSetf,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, C
Ufunction hfunc);
/**
* \brief Sets the preferred cache configuration for a device function
*
* On devices where the L1 cache and shared memory use the same hardware
* resources, this sets through \p config the preferred cache configuration
for
* the device function \p hfunc. This is only a preference. The driver will
use
* the requested configuration if possible, but it is free to choose a diff
erent
* configuration if required to execute \p hfunc. Any context-wide prefere
nce
* set via ::cuCtxSetCacheConfig() will be overridden by this per-function
* setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE.
In
* that case, the current context-wide setting will be used.
*
* This setting does nothing on devices where the size of the L1 cache and
* shared memory are fixed.
*
* Launching a kernel with a different preference than the most recent
* preference setting may insert a device-side synchronization point.
*
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
fault)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
emory
*
* \param hfunc - Kernel to configure cache for
* \param config - Requested cache configuration
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT
* \notefnerr
*
* \sa ::cuCtxGetCacheConfig,
* ::cuCtxSetCacheConfig,
* ::cuFuncSetBlockShape,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSeti,
* ::cuParamSetf,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config
);
/**
* \brief Sets the parameter size for the function
*
* Sets through \p numbytes the total size in bytes needed by the function
* parameters of the kernel corresponding to \p hfunc.
*
* \param hfunc - Kernel to set parameter size for
* \param numbytes - Size of parameter list in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetf,
* ::cuParamSeti,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
/**
* \brief Adds an integer parameter to the function's argument list
*
* Sets an integer parameter that will be specified the next time the
* kernel corresponding to \p hfunc will be invoked. \p offset is a byte of
fset.
*
* \param hfunc - Kernel to add parameter to
* \param offset - Offset to add parameter to argument list
* \param value - Value of parameter
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSetf,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, unsigned int val
ue);
/**
* \brief Adds a floating-point parameter to the function's argument list
*
* Sets a floating-point parameter that will be specified the next time the
* kernel corresponding to \p hfunc will be invoked. \p offset is a byte of
fset.
*
* \param hfunc - Kernel to add parameter to
* \param offset - Offset to add parameter to argument list
* \param value - Value of parameter
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSeti,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, float value);
/**
* \brief Adds arbitrary data to the function's argument list
*
* Copies an arbitrary amount of data (specified in \p numbytes) from \p pt
r
* into the parameter space of the kernel corresponding to \p hfunc. \p off
set
* is a byte offset.
*
* \param hfunc - Kernel to add data to
* \param offset - Offset to add data to argument list
* \param ptr - Pointer to arbitrary data
* \param numbytes - Size of data to copy in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSetf,
* ::cuParamSeti,
* ::cuLaunch,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsig
ned int numbytes);
/**
* \brief Launches a CUDA function
*
* Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block
* contains the number of threads specified by a previous call to
* ::cuFuncSetBlockShape().
*
* \param f - Kernel to launch
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_LAUNCH_FAILED,
* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* ::CUDA_ERROR_LAUNCH_TIMEOUT,
* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSetf,
* ::cuParamSeti,
* ::cuParamSetv,
* ::cuLaunchGrid,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuLaunch(CUfunction f);
/**
* \brief Launches a CUDA function
*
* Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
* blocks. Each block contains the number of threads specified by a previou
s
* call to ::cuFuncSetBlockShape().
*
* \param f - Kernel to launch
* \param grid_width - Width of grid in blocks
* \param grid_height - Height of grid in blocks
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_LAUNCH_FAILED,
* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* ::CUDA_ERROR_LAUNCH_TIMEOUT,
* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSetf,
* ::cuParamSeti,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGridAsync
*/
CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, int grid_height
);
/**
* \brief Launches a CUDA function
*
* Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
* blocks. Each block contains the number of threads specified by a previou
s
* call to ::cuFuncSetBlockShape().
*
* ::cuLaunchGridAsync() can optionally be associated to a stream by passin
g a
* non-zero \p hStream argument.
*
* \param f - Kernel to launch
* \param grid_width - Width of grid in blocks
* \param grid_height - Height of grid in blocks
* \param hStream - Stream identifier
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_LAUNCH_FAILED,
* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
* ::CUDA_ERROR_LAUNCH_TIMEOUT,
* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
* \notefnerr
*
* \sa ::cuFuncSetBlockShape,
* ::cuFuncSetSharedSize,
* ::cuFuncGetAttribute,
* ::cuParamSetSize,
* ::cuParamSetf,
* ::cuParamSeti,
* ::cuParamSetv,
* ::cuLaunch,
* ::cuLaunchGrid
*/
CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, int grid_width, int grid_h
eight, CUstream hStream);
/**
* \defgroup CUDA_EXEC_DEPRECATED Execution Control [DEPRECATED]
*
* This section describes the deprecated execution control functions of the
* low-level CUDA driver application programming interface.
*
* @{
*/
/**
* \brief Adds a texture-reference to the function's argument list
*
* \deprecated
*
* Makes the CUDA array or linear memory bound to the texture reference
* \p hTexRef available to a device program as a texture. In this version o
f
* CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() a
nd
* the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT.
*
* \param hfunc - Kernel to add texture-reference to
* \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT)
* \param hTexRef - Texture-reference to add to argument list
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*/
CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref h
TexRef);
/** @} */ /* END CUDA_EXEC_DEPRECATED */
/** @} */ /* END CUDA_EXEC */
/**
* \defgroup CUDA_TEXREF Texture Reference Management
*
* This section describes the texture reference management functions of the
* low-level CUDA driver application programming interface.
*
* @{
*/
/**
* \brief Binds an array as a texture reference
*
* Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any
* previous address or CUDA array state associated with the texture referen
ce
* is superseded by this function. \p Flags must be set to
* ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef
is
* unbound.
*
* \param hTexRef - Texture reference to bind
* \param hArray - Array to bind
* \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT)
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigne
d int Flags);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Binds an address as a texture reference
*
* Binds a linear address range to the texture reference \p hTexRef. Any
* previous address or CUDA array state associated with the texture referen
ce
* is superseded by this function. Any memory previously bound to \p hTexRe
f
* is unbound.
*
* Since the hardware enforces an alignment requirement on texture base
* addresses, ::cuTexRefSetAddress() passes back a byte offset in
* \p *ByteOffset that must be applied to texture fetches in order to read
from
* the desired memory. This offset must be divided by the texel size and
* passed to kernels that read from the texture so they can be applied to t
he
* ::tex1Dfetch() function.
*
* If the device memory pointer was returned from ::cuMemAlloc(), the offse
t
* is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parame
ter.
*
* \param ByteOffset - Returned byte offset
* \param hTexRef - Texture reference to bind
* \param dptr - Device pointer to bind
* \param bytes - Size of memory to bind in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray
,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, C
Udeviceptr dptr, size_t bytes);
/**
* \brief Binds an address as a 2D texture reference
*
* Binds a linear address range to the texture reference \p hTexRef. Any
* previous address or CUDA array state associated with the texture referen
ce
* is superseded by this function. Any memory previously bound to \p hTexRe
f
* is unbound.
*
* Using a ::tex2D() function inside a kernel requires a call to either
* ::cuTexRefSetArray() to bind the corresponding texture reference to an
* array, or ::cuTexRefSetAddress2D() to bind the texture reference to line
ar
* memory.
*
* Function calls to ::cuTexRefSetFormat() cannot follow calls to
* ::cuTexRefSetAddress2D() for the same texture reference.
*
* It is required that \p dptr be aligned to the appropriate hardware-speci
fic
* texture alignment. You can query this value using the device attribute
* ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is
* supplied, ::CUDA_ERROR_INVALID_VALUE is returned.
*
* \param hTexRef - Texture reference to bind
* \param desc - Descriptor of CUDA array
* \param dptr - Device pointer to bind
* \param Pitch - Line pitch in bytes
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE
SCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Sets the format for a texture reference
*
* Specifies the format of the data to be read by the texture reference
* \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to t
he
* ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR struct
ure:
* They specify the format of each component and the number of components p
er
* array element.
*
* \param hTexRef - Texture reference
* \param fmt - Format to set
* \param NumPackedComponents - Number of components per array element
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, in
t NumPackedComponents);
/**
* \brief Sets the addressing mode for a texture reference
*
* Specifies the addressing mode \p am for the given dimension \p dim of th
e
* texture reference \p hTexRef. If \p dim is zero, the addressing mode is
* applied to the first parameter of the functions used to fetch from the
* texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defi
ned
* as:
* \code
typedef enum CUaddress_mode_enum {
CU_TR_ADDRESS_MODE_WRAP = 0,
CU_TR_ADDRESS_MODE_CLAMP = 1,
CU_TR_ADDRESS_MODE_MIRROR = 2,
CU_TR_ADDRESS_MODE_BORDER = 3
} CUaddress_mode;
* \endcode
*
* Note that this call has no effect if \p hTexRef is bound to linear memor
y.
*
* \param hTexRef - Texture reference
* \param dim - Dimension
* \param am - Addressing mode to set
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddres
s_mode am);
/**
* \brief Sets the filtering mode for a texture reference
*
* Specifies the filtering mode \p fm to be used when reading memory throug
h
* the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as:
*
* \code
typedef enum CUfilter_mode_enum {
CU_TR_FILTER_MODE_POINT = 0,
CU_TR_FILTER_MODE_LINEAR = 1
} CUfilter_mode;
* \endcode
*
* Note that this call has no effect if \p hTexRef is bound to linear memor
y.
*
* \param hTexRef - Texture reference
* \param fm - Filtering mode to set
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
/**
* \brief Sets the flags for a texture reference
*
* Specifies optional flags via \p Flags to specify the behavior of data
* returned through the texture reference \p hTexRef. The valid flags are:
*
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
* having the texture promote integer data to floating point data in the
* range [0, 1];
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavio
r
* of having the texture coordinates range from [0, Dim) where Dim is the
* width or height of the CUDA array. Instead, the texture coordinates
* [0, 1.0) reference the entire breadth of the array dimension;
*
* \param hTexRef - Texture reference
* \param Flags - Optional flags to set
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Gets the address associated with a texture reference
*
* Returns in \p *pdptr the base address bound to the texture reference
* \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen
ce
* is not bound to any device memory range.
*
* \param pdptr - Returned device address
* \param hTexRef - Texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Gets the array bound to a texture reference
*
* Returns in \p *phArray the CUDA array bound to the texture reference
* \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen
ce
* is not bound to any CUDA array.
*
* \param phArray - Returned array
* \param hTexRef - Texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
/**
* \brief Gets the addressing mode used by a texture reference
*
* Returns in \p *pam the addressing mode corresponding to the
* dimension \p dim of the texture reference \p hTexRef. Currently, the onl
y
* valid value for \p dim are 0 and 1.
*
* \param pam - Returned addressing mode
* \param hTexRef - Texture reference
* \param dim - Dimension
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexR
ef, int dim);
/**
* \brief Gets the filter-mode used by a texture reference
*
* Returns in \p *pfm the filtering mode of the texture reference
* \p hTexRef.
*
* \param pfm - Returned filtering mode
* \param hTexRef - Texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFlags, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef
);
/**
* \brief Gets the format used by a texture reference
*
* Returns in \p *pFormat and \p *pNumChannels the format and number
* of components of the CUDA array bound to the texture reference \p hTexRe
f.
* If \p pFormat or \p pNumChannels is NULL, it will be ignored.
*
* \param pFormat - Returned format
* \param pNumChannels - Returned number of components
* \param hTexRef - Texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags
*/
CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChanne
ls, CUtexref hTexRef);
/**
* \brief Gets the flags used by a texture reference
*
* Returns in \p *pFlags the flags of the texture reference \p hTexRef.
*
* \param pFlags - Returned flags
* \param hTexRef - Texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefSetAddress,
* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
* ::cuTexRefGetFilterMode, ::cuTexRefGetFormat
*/
CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
/**
* \defgroup CUDA_TEXREF_DEPRECATED Texture Reference Management [DEPRECATE
D]
*
* This section describes the deprecated texture reference management
* functions of the low-level CUDA driver application programming interface
.
*
* @{
*/
/**
* \brief Creates a texture reference
*
* \deprecated
*
* Creates a texture reference and returns its handle in \p *pTexRef. Once
* created, the application must call ::cuTexRefSetArray() or
* ::cuTexRefSetAddress() to associate the reference with allocated memory.
* Other texture reference functions are used to specify the format and
* interpretation (addressing, filtering, etc.) to be used when the memory
is
* read through this texture reference.
*
* \param pTexRef - Returned texture reference
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefDestroy
*/
CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef);
/**
* \brief Destroys a texture reference
*
* \deprecated
*
* Destroys the texture reference specified by \p hTexRef.
*
* \param hTexRef - Texture reference to destroy
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuTexRefCreate
*/
CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef);
/** @} */ /* END CUDA_TEXREF_DEPRECATED */
/** @} */ /* END CUDA_TEXREF */
/**
* \defgroup CUDA_SURFREF Surface Reference Management
*
* This section describes the surface reference management functions of the
* low-level CUDA driver application programming interface.
*
* @{
*/
/**
* \brief Sets the CUDA array for a surface reference.
*
* Sets the CUDA array \p hArray to be read and written by the surface refe
rence
* \p hSurfRef. Any previous CUDA array state associated with the surface
* reference is superseded by this function. \p Flags must be set to 0.
* The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA arr
ay.
* Any CUDA array previously bound to \p hSurfRef is unbound.
* \param hSurfRef - Surface reference handle
* \param hArray - CUDA array handle
* \param Flags - set to 0
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuModuleGetSurfRef, ::cuSurfRefGetArray
*/
CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsi
gned int Flags);
/**
* \brief Passes back the CUDA array bound to a surface reference.
*
* Returns in \p *phArray the CUDA array bound to the surface reference
* \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface refere
nce
* is not bound to any CUDA array.
* \param phArray - Surface reference handle
* \param hSurfRef - Surface reference handle
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
*
* \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray
*/
CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
/** @} */ /* END CUDA_SURFREF */
/**
* \defgroup CUDA_GRAPHICS Graphics Interoperability
*
* This section describes the graphics interoperability functions of the
* low-level CUDA driver application programming interface.
*
* @{
*/
/**
* \brief Unregisters a graphics resource for access by CUDA
*
* Unregisters the graphics resource \p resource so it is not accessible by
* CUDA unless registered again.
*
* If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is
* returned.
*
* \param resource - Resource to unregister
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa
* ::cuGraphicsD3D9RegisterResource,
* ::cuGraphicsD3D10RegisterResource,
* ::cuGraphicsD3D11RegisterResource,
* ::cuGraphicsGLRegisterBuffer,
* ::cuGraphicsGLRegisterImage
*/
CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource);
/**
* \brief Get an array through which to access a subresource of a mapped gr
aphics resource.
*
* Returns in \p *pArray an array through which the subresource of the mapp
ed
* graphics resource \p resource which corresponds to array index \p arrayI
ndex
* and mipmap level \p mipLevel may be accessed. The value set in \p *pArr
ay may
* change every time that \p resource is mapped.
*
* If \p resource is not a texture then it cannot be accessed via an array
and
* ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned.
* If \p arrayIndex is not a valid array index for \p resource then
* ::CUDA_ERROR_INVALID_VALUE is returned.
* If \p mipLevel is not a valid mipmap level for \p resource then
* ::CUDA_ERROR_INVALID_VALUE is returned.
* If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
*
* \param pArray - Returned array through which a subresource of \p re
source may be accessed
* \param resource - Mapped resource to access
* \param arrayIndex - Array index for array textures or cubemap face
* index as defined by ::CUarray_cubemap_face for
* cubemap textures for the subresource to access
* \param mipLevel - Mipmap level for the subresource to access
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_NOT_MAPPED
* ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY
* \notefnerr
*
* \sa ::cuGraphicsResourceGetMappedPointer
*/
CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgra
phicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Get a device pointer through which to access a mapped graphics re
source.
*
* Returns in \p *pDevPtr a pointer through which the mapped graphics resou
rce
* \p resource may be accessed.
* Returns in \p pSize the size of the memory in bytes which may be accesse
d from that pointer.
* The value set in \p pPointer may change every time that \p resource is m
apped.
*
* If \p resource is not a buffer then it cannot be accessed via a pointer
and
* ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned.
* If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
* *
* \param pDevPtr - Returned pointer through which \p resource may be ac
cessed
* \param pSize - Returned size of the buffer accessible starting at \
p *pPointer
* \param resource - Mapped resource to access
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_NOT_MAPPED
* ::CUDA_ERROR_NOT_MAPPED_AS_POINTER
* \notefnerr
*
* \sa
* ::cuGraphicsMapResources,
* ::cuGraphicsSubResourceGetMappedArray
*/
CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, s
ize_t *pSize, CUgraphicsResource resource);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Set usage flags for mapping a graphics resource
*
* Set \p flags for mapping the graphics resource \p resource.
*
* Changes to \p flags will take effect the next time \p resource is mapped
.
* The \p flags argument may be any of the following:
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
is
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA kernels. This is the default value.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: Specifies that CUDA kernels
which
* access this resource will not write to this resource.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA ker
nels
* which access this resource will not read from this resource and will
* write over the entire contents of the resource, so none of the data
* previously stored in the resource will be preserved.
*
* If \p resource is presently mapped for access by CUDA then
* ::CUDA_ERROR_ALREADY_MAPPED is returned.
* If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VAL
UE is returned.
*
* \param resource - Registered resource to set flags for
* \param flags - Parameters for resource mapping
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED
* \notefnerr
*
* \sa
* ::cuGraphicsMapResources
*/
CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource,
unsigned int flags);
/**
* \brief Map graphics resources for access by CUDA
*
* Maps the \p count graphics resources in \p resources for access by CUDA.
*
* The resources in \p resources may be accessed by CUDA until they
* are unmapped. The graphics API from which \p resources were registered
* should not access any resources while they are mapped by CUDA. If an
* application does so, the results are undefined.
*
* This function provides the synchronization guarantee that any graphics c
alls
* issued before ::cuGraphicsMapResources() will complete before any subseq
uent CUDA
* work issued in \p stream begins.
*
* If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID
_HANDLE is returned.
* If any of \p resources are presently mapped for access by CUDA then ::CU
DA_ERROR_ALREADY_MAPPED is returned.
*
* \param count - Number of resources to map
* \param resources - Resources to map for CUDA usage
* \param hStream - Stream with which to synchronize
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa
* ::cuGraphicsResourceGetMappedPointer
* ::cuGraphicsSubResourceGetMappedArray
* ::cuGraphicsUnmapResources
*/
CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResou
rce *resources, CUstream hStream);
/**
* \brief Unmap graphics resources.
*
* Unmaps the \p count graphics resources in \p resources.
*
* Once unmapped, the resources in \p resources may not be accessed by CUDA
* until they are mapped again.
*
* This function provides the synchronization guarantee that any CUDA work
issued
* in \p stream before ::cuGraphicsUnmapResources() will complete before an
y
* subsequently issued graphics work begins.
*
*
* If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID
_HANDLE is returned.
* If any of \p resources are not presently mapped for access by CUDA then
::CUDA_ERROR_NOT_MAPPED is returned.
*
* \param count - Number of resources to unmap
* \param resources - Resources to unmap
* \param hStream - Stream with which to synchronize
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_NOT_MAPPED,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa
* ::cuGraphicsMapResources
*/
CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsRes
ource *resources, CUstream hStream);
/** @} */ /* END CUDA_GRAPHICS */
CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, const CUuuid
*pExportTableId);
/** @} */ /* END CUDA_DRIVER */
/**
* CUDA API versioning support
*/
#if defined(__CUDA_API_VERSION_INTERNAL)
#undef cuDeviceTotalMem
#undef cuCtxCreate
#undef cuModuleGetGlobal
#undef cuMemGetInfo
#undef cuMemAlloc
#undef cuMemAllocPitch
#undef cuMemFree
#undef cuMemGetAddressRange
#undef cuMemAllocHost
#undef cuMemHostGetDevicePointer
#undef cuMemcpyHtoD
#undef cuMemcpyDtoH
#undef cuMemcpyDtoD
#undef cuMemcpyDtoA
#undef cuMemcpyAtoD
#undef cuMemcpyHtoA
#undef cuMemcpyAtoH
#undef cuMemcpyAtoA
#undef cuMemcpyHtoAAsync
#undef cuMemcpyAtoHAsync
#undef cuMemcpy2D
#undef cuMemcpy2DUnaligned
#undef cuMemcpy3D
#undef cuMemcpyHtoDAsync
#undef cuMemcpyDtoHAsync
#undef cuMemcpyDtoDAsync
#undef cuMemcpy2DAsync
#undef cuMemcpy3DAsync
#undef cuMemsetD8
#undef cuMemsetD16
#undef cuMemsetD32
#undef cuMemsetD2D8
#undef cuMemsetD2D16
#undef cuMemsetD2D32
#undef cuArrayCreate
#undef cuArrayGetDescriptor
#undef cuArray3DCreate
#undef cuArray3DGetDescriptor
#undef cuTexRefSetAddress
#undef cuTexRefSetAddress2D
#undef cuTexRefGetAddress
#undef cuGraphicsResourceGetMappedPointer
#endif /* __CUDA_API_VERSION_INTERNAL */
/**
* CUDA API made obselete at API version 3020
*/
#if defined(__CUDA_API_VERSION_INTERNAL)
#define CUdeviceptr CUdeviceptr_v1
#define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st
#define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1
#define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st
#define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1
#define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st
#define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1
#define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st
#define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1
#endif /* CUDA_FORCE_LEGACY32_INTERNAL */
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020
typedef unsigned int CUdeviceptr;
typedef struct CUDA_MEMCPY2D_st
{
unsigned int srcXInBytes; /**< Source X in bytes */
unsigned int srcY; /**< Source Y */
CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
y) */
const void *srcHost; /**< Source host pointer */
CUdeviceptr srcDevice; /**< Source device pointer */
CUarray srcArray; /**< Source array reference */
unsigned int srcPitch; /**< Source pitch (ignored when src is arra
y) */
unsigned int dstXInBytes; /**< Destination X in bytes */
unsigned int dstY; /**< Destination Y */
CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
array) */
void *dstHost; /**< Destination host pointer */
CUdeviceptr dstDevice; /**< Destination device pointer */
CUarray dstArray; /**< Destination array reference */
unsigned int dstPitch; /**< Destination pitch (ignored when dst is
array) */
unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */
unsigned int Height; /**< Height of 2D memory copy */
} CUDA_MEMCPY2D;
typedef struct CUDA_MEMCPY3D_st
{
unsigned int srcXInBytes; /**< Source X in bytes */
unsigned int srcY; /**< Source Y */
unsigned int srcZ; /**< Source Z */
unsigned int srcLOD; /**< Source LOD */
CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
y) */
const void *srcHost; /**< Source host pointer */
CUdeviceptr srcDevice; /**< Source device pointer */
CUarray srcArray; /**< Source array reference */
void *reserved0; /**< Must be NULL */
unsigned int srcPitch; /**< Source pitch (ignored when src is arra
y) */
unsigned int srcHeight; /**< Source height (ignored when src is arr
ay; may be 0 if Depth==1) */
unsigned int dstXInBytes; /**< Destination X in bytes */
unsigned int dstY; /**< Destination Y */
unsigned int dstZ; /**< Destination Z */
unsigned int dstLOD; /**< Destination LOD */
CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
array) */
void *dstHost; /**< Destination host pointer */
CUdeviceptr dstDevice; /**< Destination device pointer */
CUarray dstArray; /**< Destination array reference */
void *reserved1; /**< Must be NULL */
unsigned int dstPitch; /**< Destination pitch (ignored when dst is
array) */
unsigned int dstHeight; /**< Destination height (ignored when dst i
s array; may be 0 if Depth==1) */
unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */
unsigned int Height; /**< Height of 3D memory copy */
unsigned int Depth; /**< Depth of 3D memory copy */
} CUDA_MEMCPY3D;
typedef struct CUDA_ARRAY_DESCRIPTOR_st
{
unsigned int Width; /**< Width of array */
unsigned int Height; /**< Height of array */
CUarray_format Format; /**< Array format */
unsigned int NumChannels; /**< Channels per array element */
} CUDA_ARRAY_DESCRIPTOR;
typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
{
unsigned int Width; /**< Width of 3D array */
unsigned int Height; /**< Height of 3D array */
unsigned int Depth; /**< Depth of 3D array */
CUarray_format Format; /**< Array format */
unsigned int NumChannels; /**< Channels per array element */
unsigned int Flags; /**< Flags */
} CUDA_ARRAY3D_DESCRIPTOR;
CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice
dev);
CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes,
CUmodule hmod, const char *name);
CUresult CUDAAPI cuMemGetInfo(unsigned int *free, unsigned int *total);
CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize);
CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, unsigned int *pPitch, u
nsigned int WidthInBytes, unsigned int Height, unsigned int ElementSizeByte
s);
CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);
CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psi
ze, CUdeviceptr dptr);
CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize);
CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, uns
igned int Flags);
CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, u
nsigned int ByteCount);
CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigne
d int ByteCount);
CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice,
unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUd
eviceptr srcDevice, unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsi
gned int srcOffset, unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, con
st void *srcHost, unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int
srcOffset, unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUa
rray srcArray, unsigned int srcOffset, unsigned int ByteCount);
CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset
, const void *srcHost, unsigned int ByteCount, CUstream hStream);
CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigne
d int srcOffset, unsigned int ByteCount, CUstream hStream);
CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo
st, unsigned int ByteCount, CUstream hStream);
CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, un
signed int ByteCount, CUstream hStream);
CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe
vice, unsigned int ByteCount, CUstream hStream);
CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre
am);
CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre
am);
CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsign
ed int N);
CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsi
gned int N);
CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsign
ed int N);
CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch,
unsigned char uc, unsigned int Width, unsigned int Height);
CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch
, unsigned short us, unsigned int Width, unsigned int Height);
CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch
, unsigned int ui, unsigned int Width, unsigned int Height);
CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO
R *pAllocateArray);
CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript
or, CUarray hArray);
CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR
IPTOR *pAllocateArray);
CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc
riptor, CUarray hArray);
CUresult CUDAAPI cuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTex
Ref, CUdeviceptr dptr, unsigned int bytes);
CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE
SCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch);
CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, u
nsigned int *pSize, CUgraphicsResource resource);
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 3020 */
#if defined(__CUDA_API_VERSION_INTERNAL)
#undef CUdeviceptr
#undef CUDA_MEMCPY2D_st
#undef CUDA_MEMCPY2D
#undef CUDA_MEMCPY3D_st
#undef CUDA_MEMCPY3D
#undef CUDA_ARRAY_DESCRIPTOR_st
#undef CUDA_ARRAY_DESCRIPTOR
#undef CUDA_ARRAY3D_DESCRIPTOR_st
#undef CUDA_ARRAY3D_DESCRIPTOR
#endif /* __CUDA_API_VERSION_INTERNAL */
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#undef __CUDA_API_VERSION
#endif /* __cuda_cuda_h__ */ #endif /* __cuda_cuda_h__ */
 End of changes. 111 change blocks. 
628 lines changed or deleted 6635 lines changed or added


 cudaGL.h   cudaGL.h 
skipping to change at line 39 skipping to change at line 39
* source code with only those rights set forth herein. * source code with only those rights set forth herein.
* *
* Any use of this source code in individual and commercial software must * Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#ifndef CUDAGL_H #ifndef CUDAGL_H
#define CUDAGL_H #define CUDAGL_H
/**
* CUDA API versioning support
*/
#if defined(CUDA_FORCE_API_VERSION)
#if (CUDA_FORCE_API_VERSION == 3010)
#define __CUDA_API_VERSION 3010
#else
#error "Unsupported value of CUDA_FORCE_API_VERSION"
#endif
#else
#define __CUDA_API_VERSION 3020
#endif /* CUDA_FORCE_API_VERSION */
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 3020
#define cuGLCtxCreate cuGLCtxCreate_v2
#define cuGLMapBufferObject cuGLMapBufferObject_v2
#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 3020 */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
CUresult CUDAAPI cuGLCtxCreate( CUcontext *pCtx, unsigned int Flags, CUdevi /**
ce device ); * \defgroup CUDA_GL OpenGL Interoperability
CUresult CUDAAPI cuGraphicsGLRegisterBuffer( CUgraphicsResource *pCudaResou * \ingroup CUDA_DRIVER
rce, GLuint buffer, unsigned int Flags ); *
CUresult CUDAAPI cuGraphicsGLRegisterImage( CUgraphicsResource *pCudaResour * This section describes the OpenGL interoperability functions of the
ce, GLuint image, GLenum target, unsigned int Flags ); * low-level CUDA driver application programming interface.
*
* @{
*/
#if defined(_WIN32) #if defined(_WIN32)
#if !defined(WGL_NV_gpu_affinity) #if !defined(WGL_NV_gpu_affinity)
typedef void* HGPUNV; typedef void* HGPUNV;
#endif #endif
CUresult CUDAAPI cuWGLGetDevice( CUdevice *pDevice, HGPUNV hGpu ); #endif /* _WIN32 */
#endif
// #if __CUDA_API_VERSION >= 3020
// CUDA 2.x compatibility API. These functions are deprecated, please use t /**
he ones above. * \brief Create a CUDA context for interoperability with OpenGL
// *
* Creates a new CUDA context, initializes OpenGL interoperability, and
* associates the CUDA context with the calling thread. It must be called
* before performing any other OpenGL interoperability operations. It may f
ail
* if the needed OpenGL driver facilities are not available. For usage of t
he
* \p Flags parameter, see ::cuCtxCreate().
*
* \param pCtx - Returned CUDA context
* \param Flags - Options for CUDA context creation
* \param device - Device on which to create the context
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
* ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
* ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
* ::cuWGLGetDevice
*/
CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevic
e device );
#endif /* __CUDA_API_VERSION >= 3020 */
// Flags to map or unmap a resource /**
* \brief Registers an OpenGL buffer object
*
* Registers the buffer object specified by \p buffer for access by
* CUDA. A handle to the registered object is returned as \p
* pCudaResource. The map flags \p Flags specify the intended usage,
* as follows:
*
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
is
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
* will not write to this resource.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* \param pCudaResource - Pointer to the returned object handle
* \param buffer - name of buffer object to be registered
* \param Flags - Map flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* \notefnerr
*
* \sa
* ::cuGLCtxCreate,
* ::cuGraphicsUnregisterResource,
* ::cuGraphicsMapResources,
* ::cuGraphicsResourceGetMappedPointer
*/
CUresult CUDAAPI cuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResour
ce, GLuint buffer, unsigned int Flags);
/**
* \brief Register an OpenGL texture or renderbuffer object
*
* Registers the texture or renderbuffer object specified by \p image for a
ccess by CUDA.
* \p target must match the type of the object.
* A handle to the registered object is returned as \p pCudaResource.
* The map flags \p Flags specify the intended usage, as follows:
*
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
is
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
* will not write to this resource.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* The following image classes are currently disallowed:
* - Textures with borders
* - Multisampled renderbuffers
*
* \param pCudaResource - Pointer to the returned object handle
* \param image - name of texture or renderbuffer object to be registered
* \param target - Identifies the type of object specified by \p image, and
must be one of
* ::GL_TEXTURE_2D,
* ::GL_TEXTURE_RECTANGLE,
* ::GL_TEXTURE_CUBE_MAP,
* ::GL_TEXTURE_3D,
* ::GL_TEXTURE_2D_ARRAY, or
* ::GL_RENDERBUFFER.
* \param Flags - Map flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* \notefnerr
*
* \sa
* ::cuGLCtxCreate,
* ::cuGraphicsUnregisterResource,
* ::cuGraphicsMapResources,
* ::cuGraphicsSubResourceGetMappedArray
*/
CUresult CUDAAPI cuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResourc
e, GLuint image, GLenum target, unsigned int Flags);
#ifdef _WIN32
/**
* \brief Gets the CUDA device associated with hGpu
*
* Returns in \p *pDevice the CUDA device associated with a \p hGpu, if
* applicable.
*
* \param pDevice - Device associated with hGpu
* \param hGpu - Handle to a GPU, as queried via ::WGL_NV_gpu_affinity()
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuGLCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
* ::cuGLUnregisterBufferObject, ::cuGLUnmapBufferObjectAsync,
* ::cuGLSetBufferObjectMapFlags
*/
CUresult CUDAAPI cuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu);
#endif /* _WIN32 */
/**
* \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED]
* This section describes deprecated OpenGL interoperability functionality.
*
* @{
*/
/** Flags to map or unmap a resource */
typedef enum CUGLmap_flags_enum { typedef enum CUGLmap_flags_enum {
CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00, CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} CUGLmap_flags; } CUGLmap_flags;
/**
* \brief Initializes OpenGL interoperability
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Initializes OpenGL interoperability. This function is deprecated
* and calling it is no longer required. It may fail if the needed
* OpenGL driver facilities are not available.
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_UNKNOWN
* \notefnerr
*
* \sa ::cuGLCtxCreate, ::cuGLMapBufferObject,
* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
* ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
* ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
* ::cuWGLGetDevice
*/
CUresult CUDAAPI cuGLInit(void); CUresult CUDAAPI cuGLInit(void);
CUresult CUDAAPI cuGLRegisterBufferObject( GLuint buffer );
CUresult CUDAAPI cuGLMapBufferObject( CUdeviceptr *dptr, unsigned int *size
, GLuint buffer );
CUresult CUDAAPI cuGLUnmapBufferObject( GLuint buffer );
CUresult CUDAAPI cuGLUnregisterBufferObject( GLuint buffer );
CUresult CUDAAPI cuGLSetBufferObjectMapFlags( GLuint buffer, unsigned int F /**
lags ); * \brief Registers an OpenGL buffer object
CUresult CUDAAPI cuGLMapBufferObjectAsync( CUdeviceptr *dptr, unsigned int *
*size, GLuint buffer, CUstream hStream ); * \deprecated This function is deprecated as of Cuda 3.0.
CUresult CUDAAPI cuGLUnmapBufferObjectAsync( GLuint buffer, CUstream hStrea *
m ); * Registers the buffer object specified by \p buffer for access by
* CUDA. This function must be called before CUDA can map the buffer
* object. There must be a valid OpenGL context bound to the current
* thread when this function is called, and the buffer name is
* resolved by that context.
*
* \param buffer - The name of the buffer object to register.
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_ALREADY_MAPPED
* \notefnerr
*
* \sa ::cuGraphicsGLRegisterBuffer
*/
CUresult CUDAAPI cuGLRegisterBufferObject(GLuint buffer);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Maps an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Maps the buffer object specified by \p buffer into the address space of
the
* current CUDA context and returns in \p *dptr and \p *size the base point
er
* and size of the resulting mapping.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* All streams in the current CUDA context are synchronized with the
* current GL context.
*
* \param dptr - Returned mapped base pointer
* \param size - Returned size of mapping
* \param buffer - The name of the buffer object to map
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_MAP_FAILED
* \notefnerr
*
* \sa ::cuGraphicsMapResources
*/
CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr *dptr, size_t *size, GLui
nt buffer);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Unmaps an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unmaps the buffer object specified by \p buffer for access by CUDA.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* All streams in the current CUDA context are synchronized with the
* current GL context.
*
* \param buffer - Buffer object to unmap
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuGraphicsUnmapResources
*/
CUresult CUDAAPI cuGLUnmapBufferObject(GLuint buffer);
/**
* \brief Unregister an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unregisters the buffer object specified by \p buffer. This
* releases any resources associated with the registered buffer.
* After this call, the buffer may no longer be mapped for access by
* CUDA.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* \param buffer - Name of the buffer object to unregister
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuGraphicsUnregisterResource
*/
CUresult CUDAAPI cuGLUnregisterBufferObject(GLuint buffer);
/**
* \brief Set the map flags for an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Sets the map flags for the buffer object specified by \p buffer.
*
* Changes to \p Flags will take effect the next time \p buffer is mapped.
* The \p Flags argument may be any of the following:
* - ::CU_GL_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA kernels. This is the default value.
* - ::CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels whic
h
* access this resource will not write to this resource.
* - ::CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
* which access this resource will not read from this resource and will
* write over the entire contents of the resource, so none of the data
* previously stored in the resource will be preserved.
*
* If \p buffer has not been registered for use with CUDA, then
* ::CUDA_ERROR_INVALID_HANDLE is returned. If \p buffer is presently
* mapped for access by CUDA, then ::CUDA_ERROR_ALREADY_MAPPED is returned.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* \param buffer - Buffer object to unmap
* \param Flags - Map flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* \notefnerr
*
* \sa ::cuGraphicsResourceSetMapFlags
*/
CUresult CUDAAPI cuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Fl
ags);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Maps an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Maps the buffer object specified by \p buffer into the address space of
the
* current CUDA context and returns in \p *dptr and \p *size the base point
er
* and size of the resulting mapping.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* Stream \p hStream in the current CUDA context is synchronized with
* the current GL context.
*
* \param dptr - Returned mapped base pointer
* \param size - Returned size of mapping
* \param buffer - The name of the buffer object to map
* \param hStream - Stream to synchronize
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_MAP_FAILED
* \notefnerr
*
* \sa ::cuGraphicsMapResources
*/
CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr *dptr, size_t *size,
GLuint buffer, CUstream hStream);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Unmaps an OpenGL buffer object
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unmaps the buffer object specified by \p buffer for access by CUDA.
*
* There must be a valid OpenGL context bound to the current thread
* when this function is called. This must be the same context, or a
* member of the same shareGroup, as the context that was bound when
* the buffer was registered.
*
* Stream \p hStream in the current CUDA context is synchronized with
* the current GL context.
*
* \param buffer - Name of the buffer object to unmap
* \param hStream - Stream to synchronize
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuGraphicsUnmapResources
*/
CUresult CUDAAPI cuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream
);
/** @} */ /* END CUDA_GL_DEPRECATED */
/** @} */ /* END CUDA_GL */
/**
* CUDA API versioning support
*/
#if defined(__CUDA_API_VERSION_INTERNAL)
#undef cuGLCtxCreate
#undef cuGLMapBufferObject
#undef cuGLMapBufferObjectAsync
#endif /* __CUDA_API_VERSION_INTERNAL */
/**
* CUDA API made obselete at API version 3020
*/
#if defined(__CUDA_API_VERSION_INTERNAL)
#define CUdeviceptr CUdeviceptr_v1
#endif /* __CUDA_API_VERSION_INTERNAL */
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020
CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevic
e device );
CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr *dptr, unsigned int *size,
GLuint buffer);
CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr *dptr, unsigned int *
size, GLuint buffer, CUstream hStream);
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 3020 */
#if defined(__CUDA_API_VERSION_INTERNAL)
#undef CUdeviceptr
#endif /* __CUDA_API_VERSION_INTERNAL */
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
#undef __CUDA_API_VERSION
#endif #endif
 End of changes. 9 change blocks. 
24 lines changed or deleted 485 lines changed or added


 cudaVDPAU.h   cudaVDPAU.h 
skipping to change at line 39 skipping to change at line 39
* source code with only those rights set forth herein. * source code with only those rights set forth herein.
* *
* Any use of this source code in individual and commercial software must * Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#ifndef CUDAVDPAU_H #ifndef CUDAVDPAU_H
#define CUDAVDPAU_H #define CUDAVDPAU_H
/**
* CUDA API versioning support
*/
#if defined(CUDA_FORCE_API_VERSION)
#if (CUDA_FORCE_API_VERSION == 3010)
#define __CUDA_API_VERSION 3010
#else
#error "Unsupported value of CUDA_FORCE_API_VERSION"
#endif
#else
#define __CUDA_API_VERSION 3020
#endif /* CUDA_FORCE_API_VERSION */
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION >= 3020
#define cuVDPAUCtxCreate cuVDPAUCtxCreate_v2
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION >= 3020 */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
CUresult CUDAAPI cuVDPAUGetDevice( CUdevice *pDevice, VdpDevice vdpDevice, /**
VdpGetProcAddress *vdpGetProcAddress ); * \defgroup CUDA_VDPAU VDPAU Interoperability
CUresult CUDAAPI cuVDPAUCtxCreate( CUcontext *pCtx, unsigned int flags, CUd * \ingroup CUDA_DRIVER
evice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress ); *
CUresult CUDAAPI cuGraphicsVDPAURegisterVideoSurface( CUgraphicsResource *p * This section describes the VDPAU interoperability functions of the
CudaResource, VdpVideoSurface vdpSurface, unsigned int flags ); * low-level CUDA driver application programming interface.
CUresult CUDAAPI cuGraphicsVDPAURegisterOutputSurface( CUgraphicsResource * *
pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags ); * @{
*/
/**
* \brief Gets the CUDA device associated with a VDPAU device
*
* Returns in \p *pDevice the CUDA device associated with a \p vdpDevice, i
f
* applicable.
*
* \param pDevice - Device associated with vdpDevice
* \param vdpDevice - A VdpDevice handle
* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE
* \notefnerr
*
* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate, ::cuGraphicsVDPAURegisterVideoSur
face,
* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray
*/
CUresult CUDAAPI cuVDPAUGetDevice(CUdevice *pDevice, VdpDevice vdpDevice, V
dpGetProcAddress *vdpGetProcAddress);
#if __CUDA_API_VERSION >= 3020
/**
* \brief Create a CUDA context for interoperability with VDPAU
*
* Creates a new CUDA context, initializes VDPAU interoperability, and
* associates the CUDA context with the calling thread. It must be called
* before performing any other VDPAU interoperability operations. It may fa
il
* if the needed VDPAU driver facilities are not available. For usage of th
e
* \p flags parameter, see ::cuCtxCreate().
*
* \param pCtx - Returned CUDA context
* \param flags - Options for CUDA context creation
* \param device - Device on which to create the context
* \param vdpDevice - The VdpDevice to interop with
* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
* ::CUDA_ERROR_NOT_INITIALIZED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* ::CUDA_ERROR_INVALID_VALUE,
* ::CUDA_ERROR_OUT_OF_MEMORY
* \notefnerr
*
* \sa ::cuCtxCreate, ::cuGraphicsVDPAURegisterVideoSurface,
* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
* ::cuVDPAUGetDevice
*/
CUresult CUDAAPI cuVDPAUCtxCreate(CUcontext *pCtx, unsigned int flags, CUde
vice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
#endif /* __CUDA_API_VERSION >= 3020 */
/**
* \brief Registers a VDPAU VdpVideoSurface object
*
* Registers the VdpVideoSurface specified by \p vdpSurface for access by
* CUDA. A handle to the registered object is returned as \p pCudaResource.
* The surface's intended usage is specified using \p flags, as follows:
*
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
is
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
* will not write to this resource.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* The VdpVideoSurface is presented as an array of subresources that may be
* accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArra
y.
* The exact number of valid \p arrayIndex values depends on the VDPAU surf
ace
* format. The mapping is shown in the table below. \p mipLevel must be 0.
*
* \htmlonly
* <table>
* <tr><th>VdpChromaType </th><th>arrayIndex<
/th><th>Size </th><th>Format</th><th>Content </th></tr>
* <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_420</td><td>0 <
/td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr>
* <tr> <td>1 <
/td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr>
* <tr> <td>2 <
/td><td>w/2 x h/4</td><td>R8G8 </td><td>Top-field chroma </td></tr>
* <tr> <td>3 <
/td><td>w/2 x h/4</td><td>R8G8 </td><td>Bottom-field chroma</td></tr>
* <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_422</td><td>0 <
/td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr>
* <tr> <td>1 <
/td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr>
* <tr> <td>2 <
/td><td>w/2 x h/2</td><td>R8G8 </td><td>Top-field chroma </td></tr>
* <tr> <td>3 <
/td><td>w/2 x h/2</td><td>R8G8 </td><td>Bottom-field chroma</td></tr>
* </table>
* \endhtmlonly
*
* \latexonly
* \begin{tabular}{|l|l|l|l|l|}
* \hline
* VdpChromaType & arrayIndex & Size & Format & Content
\\
* \hline
* VDP\_CHROMA\_TYPE\_420 & 0 & w x h/2 & R8 & Top-field lum
a \\
* & 1 & w x h/2 & R8 & Bottom-field
luma \\
* & 2 & w/2 x h/4 & R8G8 & Top-field chr
oma \\
* & 3 & w/2 x h/4 & R8G8 & Bottom-field
chroma \\
* \hline
* VDP\_CHROMA\_TYPE\_422 & 0 & w x h/2 & R8 & Top-field lum
a \\
* & 1 & w x h/2 & R8 & Bottom-field
luma \\
* & 2 & w/2 x h/2 & R8G8 & Top-field chr
oma \\
* & 3 & w/2 x h/2 & R8G8 & Bottom-field
chroma \\
* \hline
* \end{tabular}
* \endlatexonly
*
* \param pCudaResource - Pointer to the returned object handle
* \param vdpSurface - The VdpVideoSurface to be registered
* \param flags - Map flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* \notefnerr
*
* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate,
* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
* ::cuVDPAUGetDevice
*/
CUresult CUDAAPI cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource *pC
udaResource, VdpVideoSurface vdpSurface, unsigned int flags);
/**
* \brief Registers a VDPAU VdpOutputSurface object
*
* Registers the VdpOutputSurface specified by \p vdpSurface for access by
* CUDA. A handle to the registered object is returned as \p pCudaResource.
* The surface's intended usage is specified using \p flags, as follows:
*
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
is
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
* will not write to this resource.
* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* The VdpOutputSurface is presented as an array of subresources that may b
e
* accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArra
y.
* The exact number of valid \p arrayIndex values depends on the VDPAU surf
ace
* format. The mapping is shown in the table below. \p mipLevel must be 0.
*
* \htmlonly
* <table>
* <tr><th>VdpRGBAFormat </th><th>arrayIndex</th><th>Size </th
><th>Format </th><th>Content </th></tr>
* <tr><td>VDP_RGBA_FORMAT_B8G8R8A8 </td><td>0 </td><td>w x h</td
><td>ARGB8 </td><td>Entire surface</td></tr>
* <tr><td>VDP_RGBA_FORMAT_R10G10B10A2</td><td>0 </td><td>w x h</td
><td>A2BGR10</td><td>Entire surface</td></tr>
* </table>
* \endhtmlonly
*
* \latexonly
* \begin{tabular}{|l|l|l|l|l|}
* \hline
* VdpRGBAFormat & arrayIndex & Size & Format & Content
\\
* \hline
* VDP\_RGBA\_FORMAT\_B8G8R8A8 & 0 & w x h & ARGB8 & Entire s
urface \\
* VDP\_RGBA\_FORMAT\_R10G10B10A2 & 0 & w x h & A2BGR10 & Entire s
urface \\
* \hline
* \end{tabular}
* \endlatexonly
*
* \param pCudaResource - Pointer to the returned object handle
* \param vdpSurface - The VdpOutputSurface to be registered
* \param flags - Map flags
*
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_INVALID_HANDLE,
* ::CUDA_ERROR_ALREADY_MAPPED,
* ::CUDA_ERROR_INVALID_CONTEXT,
* \notefnerr
*
* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate,
* ::cuGraphicsVDPAURegisterVideoSurface, ::cuGraphicsUnregisterResource,
* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
* ::cuVDPAUGetDevice
*/
CUresult CUDAAPI cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource *p
CudaResource, VdpOutputSurface vdpSurface, unsigned int flags);
/** @} */ /* END CUDA_VDPAU */
/**
* CUDA API versioning support
*/
#if defined(__CUDA_API_VERSION_INTERNAL)
#undef cuVDPAUCtxCreate
#endif /* __CUDA_API_VERSION_INTERNAL */
/**
* CUDA API made obselete at API version 3020
*/
#if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020
CUresult CUDAAPI cuVDPAUCtxCreate(CUcontext *pCtx, unsigned int flags, CUde
vice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
#endif /* __CUDA_API_VERSION_INTERNAL || __CUDA_API_VERSION < 3020 */
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
#undef __CUDA_API_VERSION
#endif #endif
 End of changes. 3 change blocks. 
8 lines changed or deleted 278 lines changed or added


 cuda_gl_interop.h   cuda_gl_interop.h 
skipping to change at line 39 skipping to change at line 39
* source code with only those rights set forth herein. * source code with only those rights set forth herein.
* *
* Any use of this source code in individual and commercial software must * Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#if !defined(__CUDA_GL_INTEROP_H__) #if !defined(__CUDA_GL_INTEROP_H__)
#define __CUDA_GL_INTEROP_H__ #define __CUDA_GL_INTEROP_H__
/**************************************************************************
*****
*
*
*
*
*
*
***************************************************************************
****/
#include "builtin_types.h" #include "builtin_types.h"
#include "host_defines.h" #include "host_defines.h"
#if defined(__APPLE__) #if defined(__APPLE__)
#include <OpenGL/gl.h> #include <OpenGL/gl.h>
#else /* __APPLE__ */ #else /* __APPLE__ */
#include <GL/gl.h> #include <GL/gl.h>
#endif /* __APPLE__ */ #endif /* __APPLE__ */
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
/************************************************************************** /**
***** * \addtogroup CUDART_OPENGL OpenGL Interoperability
* * This section describes the OpenGL interoperability functions of the CUDA
* * runtime application programming interface.
* *
* * @{
* */
*
***************************************************************************
****/
/**
* \brief Sets the CUDA device for use with OpenGL interoperability
*
* Records \p device as the device on which the active host thread executes
* the device code. Records the thread as using OpenGL interoperability.
* If the host thread has already initialized the CUDA runtime by
* calling non-device management runtime functions or if there exists a CUD
A
* driver context active on the host thread, then this call returns
* ::cudaErrorSetOnActiveProcess.
*
* \param device - Device to use for OpenGL interoperability
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorSetOnActiveProcess
* \notefnerr
*
* \sa ::cudaGLRegisterBufferObject, ::cudaGLMapBufferObject,
* ::cudaGLUnmapBufferObject, ::cudaGLUnregisterBufferObject,
* ::cudaGLMapBufferObjectAsync, ::cudaGLUnmapBufferObjectAsync
*/
extern __host__ cudaError_t CUDARTAPI cudaGLSetGLDevice(int device); extern __host__ cudaError_t CUDARTAPI cudaGLSetGLDevice(int device);
extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterImage(struct cu
daGraphicsResource **resource, GLuint image, GLenum target, unsigned int Fl /**
ags); * \brief Register an OpenGL texture or renderbuffer object
extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterBuffer(struct c *
udaGraphicsResource **resource, GLuint buffer, unsigned int Flags); * Registers the texture or renderbuffer object specified by \p image for a
ccess by CUDA.
* \p target must match the type of the object.
* A handle to the registered object is returned as \p resource.
* The map flags \p flags specify the intended usage, as follows:
*
* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
* will not write to this resource.
* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* The following image classes are currently disallowed:
* - Textures with borders
* - Multisampled renderbuffers
*
* \param resource - Pointer to the returned object handle
* \param image - name of texture or renderbuffer object to be registere
d
* \param target - Identifies the type of object specified by \p image, a
nd must be one of
* ::GL_TEXTURE_2D,
* ::GL_TEXTURE_RECTANGLE,
* ::GL_TEXTURE_CUBE_MAP,
* ::GL_TEXTURE_3D,
* ::GL_TEXTURE_2D_ARRAY, or
* ::GL_RENDERBUFFER.
* \param flags - Map flags
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidResourceHandle,
* ::cudaErrorUnknown
* \notefnerr
*
* \sa
* ::cudaGLSetGLDevice
* ::cudaGraphicsUnregisterResource,
* ::cudaGraphicsMapResources,
* ::cudaGraphicsSubResourceGetMappedArray
*/
extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterImage(struct cu
daGraphicsResource **resource, GLuint image, GLenum target, unsigned int fl
ags);
/**
* \brief Registers an OpenGL buffer object
*
* Registers the buffer object specified by \p buffer for access by
* CUDA. A handle to the registered object is returned as \p
* resource. The map flags \p flags specify the intended usage,
* as follows:
*
* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
* will not write to this resource.
* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* \param resource - Pointer to the returned object handle
* \param buffer - name of buffer object to be registered
* \param flags - Map flags
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidResourceHandle,
* ::cudaErrorUnknown
* \notefnerr
*
* \sa
* ::cudaGLCtxCreate,
* ::cudaGraphicsUnregisterResource,
* ::cudaGraphicsMapResources,
* ::cudaGraphicsResourceGetMappedPointer
*/
extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterBuffer(struct c
udaGraphicsResource **resource, GLuint buffer, unsigned int flags);
#ifdef _WIN32 #ifdef _WIN32
#ifndef WGL_NV_gpu_affinity #ifndef WGL_NV_gpu_affinity
typedef void* HGPUNV; typedef void* HGPUNV;
#endif #endif
/**
* \brief Gets the CUDA device associated with hGpu
*
* Returns the CUDA device associated with a hGpu, if applicable.
*
* \param device - Returns the device associated with hGpu, or -1 if hGpu i
s
* not a compute device.
* \param hGpu - Handle to a GPU, as queried via WGL_NV_gpu_affinity()
*
* \return
* ::cudaSuccess
* \notefnerr
*
* \sa WGL_NV_gpu_affinity, ::cudaGLSetGLDevice
*/
extern __host__ cudaError_t CUDARTAPI cudaWGLGetDevice(int *device, HGPUNV hGpu); extern __host__ cudaError_t CUDARTAPI cudaWGLGetDevice(int *device, HGPUNV hGpu);
#endif #endif
/** /**
* CUDA GL Map Flags * CUDA GL Map Flags
*/ */
enum cudaGLMapFlags enum cudaGLMapFlags
{ {
cudaGLMapFlagsNone = 0, ///< Default; Assume resource can be rea cudaGLMapFlagsNone = 0, /**< Default; Assume resource can be rea
d/written d/written */
cudaGLMapFlagsReadOnly = 1, ///< CUDA kernels will not write to this cudaGLMapFlagsReadOnly = 1, /**< CUDA kernels will not write to this
resource resource */
cudaGLMapFlagsWriteDiscard = 2, ///< CUDA kernels will only write to and cudaGLMapFlagsWriteDiscard = 2, /**< CUDA kernels will only write to and
will not read from this resource will not read from this resource */
}; };
/**
* \defgroup CUDART_OPENGL_DEPRECATED OpenGL Interoperability [DEPRECATED]
* This section describes deprecated OpenGL interoperability functionality.
*
* @{
*/
/**
* \brief Registers a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Registers the buffer object of ID \p bufObj for access by
* CUDA. This function must be called before CUDA can map the buffer
* object. The OpenGL context used to create the buffer, or another
* context from the same share group, must be bound to the current
* thread when this is called.
*
* \param bufObj - Buffer object ID to register
*
* \return
* ::cudaSuccess,
* ::cudaErrorInitializationError
* \notefnerr
*
* \sa ::cudaGraphicsGLRegisterBuffer
*/
extern __host__ cudaError_t CUDARTAPI cudaGLRegisterBufferObject(GLuint buf Obj); extern __host__ cudaError_t CUDARTAPI cudaGLRegisterBufferObject(GLuint buf Obj);
/**
* \brief Maps a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Maps the buffer object of ID \p bufObj into the address space of
* CUDA and returns in \p *devPtr the base pointer of the resulting
* mapping. The buffer must have previously been registered by
* calling ::cudaGLRegisterBufferObject(). While a buffer is mapped
* by CUDA, any OpenGL operation which references the buffer will
* result in undefined behavior. The OpenGL context used to create
* the buffer, or another context from the same share group, must be
* bound to the current thread when this is called.
*
* All streams in the current thread are synchronized with the current
* GL context.
*
* \param devPtr - Returned device pointer to CUDA object
* \param bufObj - Buffer object ID to map
*
* \return
* ::cudaSuccess,
* ::cudaErrorMapBufferObjectFailed
* \notefnerr
*
* \sa ::cudaGraphicsMapResources
*/
extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObject(void **devPtr, GLuint bufObj); extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObject(void **devPtr, GLuint bufObj);
/**
* \brief Unmaps a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unmaps the buffer object of ID \p bufObj for access by CUDA. When
* a buffer is unmapped, the base address returned by
* ::cudaGLMapBufferObject() is invalid and subsequent references to
* the address result in undefined behavior. The OpenGL context used
* to create the buffer, or another context from the same share group,
* must be bound to the current thread when this is called.
*
* All streams in the current thread are synchronized with the current
* GL context.
*
* \param bufObj - Buffer object to unmap
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevicePointer,
* ::cudaErrorUnmapBufferObjectFailed
* \notefnerr
*
* \sa ::cudaGraphicsUnmapResources
*/
extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObject(GLuint bufObj ); extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObject(GLuint bufObj );
/**
* \brief Unregisters a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unregisters the buffer object of ID \p bufObj for access by CUDA
* and releases any CUDA resources associated with the buffer. Once a
* buffer is unregistered, it may no longer be mapped by CUDA. The GL
* context used to create the buffer, or another context from the
* same share group, must be bound to the current thread when this is
* called.
*
* \param bufObj - Buffer object to unregister
*
* \return
* ::cudaSuccess
* \notefnerr
*
* \sa ::cudaGraphicsUnregisterResource
*/
extern __host__ cudaError_t CUDARTAPI cudaGLUnregisterBufferObject(GLuint b ufObj); extern __host__ cudaError_t CUDARTAPI cudaGLUnregisterBufferObject(GLuint b ufObj);
/**
* \brief Set usage flags for mapping an OpenGL buffer
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Set flags for mapping the OpenGL buffer \p bufObj
*
* Changes to flags will take effect the next time \p bufObj is mapped.
* The \p flags argument may be any of the following:
*
* - ::cudaGLMapFlagsNone: Specifies no hints about how this buffer will
* be used. It is therefore assumed that this buffer will be read from and
* written to by CUDA kernels. This is the default value.
* - ::cudaGLMapFlagsReadOnly: Specifies that CUDA kernels which access thi
s
* buffer will not write to the buffer.
* - ::cudaGLMapFlagsWriteDiscard: Specifies that CUDA kernels which access
* this buffer will not read from the buffer and will write over the
* entire contents of the buffer, so none of the data previously stored in
* the buffer will be preserved.
*
* If \p bufObj has not been registered for use with CUDA, then
* ::cudaErrorInvalidResourceHandle is returned. If \p bufObj is presently
* mapped for access by CUDA, then ::cudaErrorUnknown is returned.
*
* \param bufObj - Registered buffer object to set flags for
* \param flags - Parameters for buffer mapping
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidResourceHandle,
* ::cudaErrorUnknown
* \notefnerr
*
* \sa ::cudaGraphicsResourceSetMapFlags
*/
extern __host__ cudaError_t CUDARTAPI cudaGLSetBufferObjectMapFlags(GLuint bufObj, unsigned int flags); extern __host__ cudaError_t CUDARTAPI cudaGLSetBufferObjectMapFlags(GLuint bufObj, unsigned int flags);
/**
* \brief Maps a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Maps the buffer object of ID \p bufObj into the address space of
* CUDA and returns in \p *devPtr the base pointer of the resulting
* mapping. The buffer must have previously been registered by
* calling ::cudaGLRegisterBufferObject(). While a buffer is mapped
* by CUDA, any OpenGL operation which references the buffer will
* result in undefined behavior. The OpenGL context used to create
* the buffer, or another context from the same share group, must be
* bound to the current thread when this is called.
*
* Stream /p stream is synchronized with the current GL context.
*
* \param devPtr - Returned device pointer to CUDA object
* \param bufObj - Buffer object ID to map
* \param stream - Stream to synchronize
*
* \return
* ::cudaSuccess,
* ::cudaErrorMapBufferObjectFailed
* \notefnerr
*
* \sa ::cudaGraphicsMapResources
*/
extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObjectAsync(void **dev Ptr, GLuint bufObj, cudaStream_t stream); extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObjectAsync(void **dev Ptr, GLuint bufObj, cudaStream_t stream);
/**
* \brief Unmaps a buffer object for access by CUDA
*
* \deprecated This function is deprecated as of Cuda 3.0.
*
* Unmaps the buffer object of ID \p bufObj for access by CUDA. When
* a buffer is unmapped, the base address returned by
* ::cudaGLMapBufferObject() is invalid and subsequent references to
* the address result in undefined behavior. The OpenGL context used
* to create the buffer, or another context from the same share group,
* must be bound to the current thread when this is called.
*
* Stream /p stream is synchronized with the current GL context.
*
* \param bufObj - Buffer object to unmap
* \param stream - Stream to synchronize
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevicePointer,
* ::cudaErrorUnmapBufferObjectFailed
* \notefnerr
*
* \sa ::cudaGraphicsUnmapResources
*/
extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObjectAsync(GLuint b ufObj, cudaStream_t stream); extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObjectAsync(GLuint b ufObj, cudaStream_t stream);
/** @} */ /* END CUDART_OPENGL_DEPRECATED */
/** @} */ /* END CUDART_OPENGL */
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif /* __cplusplus */ #endif /* __cplusplus */
#endif /* __CUDA_GL_INTEROP_H__ */ #endif /* __CUDA_GL_INTEROP_H__ */
 End of changes. 14 change blocks. 
32 lines changed or deleted 345 lines changed or added


 cuda_runtime.h   cuda_runtime.h 
skipping to change at line 79 skipping to change at line 79
#if defined(__cplusplus) #if defined(__cplusplus)
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/** /**
* \ingroup CUDART_HIGHLEVEL * \addtogroup CUDART_HIGHLEVEL
* @{
*/
/**
* \brief \hl Configure a device launch * \brief \hl Configure a device launch
* *
* Pushes \p size bytes of the argument pointed to by \p arg at \p offset * Pushes \p size bytes of the argument pointed to by \p arg at \p offset
* bytes from the start of the parameter passing area, which starts at * bytes from the start of the parameter passing area, which starts at
* offset 0. The arguments are stored in the top of the execution stack. * offset 0. The arguments are stored in the top of the execution stack.
* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument()" must be precede d * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument()" must be preced ed
* by a call to ::cudaConfigureCall(). * by a call to ::cudaConfigureCall().
* *
* \param arg - Argument to push for a kernel launch * \param arg - Argument to push for a kernel launch
* \param offset - Offset in argument stack to push new arg * \param offset - Offset in argument stack to push new arg
* *
* \return * \return
* ::cudaSuccess * ::cudaSuccess
* \notefnerr * \notefnerr
* *
* \sa ::cudaConfigureCall, * \sa ::cudaConfigureCall,
skipping to change at line 111 skipping to change at line 115
*/ */
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaSetupArgument( __inline__ __host__ cudaError_t cudaSetupArgument(
T arg, T arg,
size_t offset size_t offset
) )
{ {
return cudaSetupArgument((const void*)&arg, sizeof(T), offset); return cudaSetupArgument((const void*)&arg, sizeof(T), offset);
} }
/**
* \brief \hl Creates an event object with the specified flags
*
* Creates an event object with the specified flags. Valid flags include:
* - ::cudaEventDefault: Default event creation flag.
* - ::cudaEventBlockingSync: Specifies that event should use blocking
* synchronization. A host thread that uses ::cudaEventSynchronize() to w
ait
* on an event created with this flag will block until the event actually
* completes.
* - ::cudaEventDisableTiming: Specifies that the created event does not ne
ed
* to record timing data. Events created with this flag specified and
* the ::cudaEventBlockingSync flag not specified will provide the best
* performance when used with ::cudaStreamWaitEvent() and ::cudaEventQuer
y().
*
* \param event - Newly created event
* \param flags - Flags for new event
*
* \return
* ::cudaSuccess,
* ::cudaErrorInitializationError,
* ::cudaErrorInvalidValue,
* ::cudaErrorLaunchFailure,
* ::cudaErrorMemoryAllocation
* \notefnerr
*
* \sa \ref ::cudaEventCreate(cudaEvent_t*) "cudaEventCreate (C API)",
* ::cudaEventCreateWithFlags, ::cudaEventRecord, ::cudaEventQuery,
* ::cudaEventSynchronize, ::cudaEventDestroy, ::cudaEventElapsedTime,
* ::cudaStreamWaitEvent
*/
static __inline__ __host__ cudaError_t cudaEventCreate(
cudaEvent_t *event,
unsigned int flags
)
{
return cudaEventCreateWithFlags(event, cudaEventDefault);
}
/**
* \brief \hl Allocates page-locked memory on the host
*
* Allocates \p size bytes of host memory that is page-locked and accessibl
e
* to the device. The driver tracks the virtual memory ranges allocated wit
h
* this function and automatically accelerates calls to functions such as
* ::cudaMemcpy(). Since the memory can be accessed directly by the device,
it
* can be read or written with much higher bandwidth than pageable memory
* obtained with functions such as ::malloc(). Allocating excessive amounts
of
* pinned memory may degrade system performance, since it reduces the amoun
t
* of memory available to the system for paging. As a result, this function
is
* best used sparingly to allocate staging areas for data exchange between
host
* and device.
*
* The \p flags parameter enables different options to be specified that af
fect
* the allocation, as follows.
* - ::cudaHostAllocDefault: This flag's value is defined to be 0.
* - ::cudaHostAllocPortable: The memory returned by this call will be
* considered as pinned memory by all CUDA contexts, not just the one that
* performed the allocation.
* - ::cudaHostAllocMapped: Maps the allocation into the CUDA address space
.
* The device pointer to the memory may be obtained by calling
* ::cudaHostGetDevicePointer().
* - ::cudaHostAllocWriteCombined: Allocates the memory as write-combined (
WC).
* WC memory can be transferred across the PCI Express bus more quickly on
some
* system configurations, but cannot be read efficiently by most CPUs. WC
* memory is a good option for buffers that will be written by the CPU and
read
* by the device via mapped pinned memory or host->device transfers.
*
* All of these flags are orthogonal to one another: a developer may alloca
te
* memory that is portable, mapped and/or write-combined with no restrictio
ns.
*
* ::cudaSetDeviceFlags() must have been called with the ::cudaDeviceMapHos
t
* flag in order for the ::cudaHostAllocMapped flag to have any effect.
*
* The ::cudaHostAllocMapped flag may be specified on CUDA contexts for dev
ices
* that do not support mapped pinned memory. The failure is deferred to
* ::cudaHostGetDevicePointer() because the memory may be mapped into other
* CUDA contexts via the ::cudaHostAllocPortable flag.
*
* Memory allocated by this function must be freed with ::cudaFreeHost().
*
* \param ptr - Device pointer to allocated memory
* \param size - Requested allocation size in bytes
* \param flags - Requested properties of allocated memory
*
* \return
* ::cudaSuccess,
* ::cudaErrorMemoryAllocation
* \notefnerr
*
* \sa ::cudaSetDeviceFlags,
* \ref ::cudaMallocHost(void**, size_t) "cudaMallocHost (C API)",
* ::cudaFreeHost, ::cudaHostAlloc
*/
static __inline__ __host__ cudaError_t cudaMallocHost(
void **ptr,
size_t size,
unsigned int flags
)
{
return cudaHostAlloc(ptr, size, flags);
}
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaHostAlloc( __inline__ __host__ cudaError_t cudaHostAlloc(
T **ptr, T **ptr,
size_t size, size_t size,
unsigned int flags unsigned int flags
) )
{ {
return cudaHostAlloc((void**)(void*)ptr, size, flags); return cudaHostAlloc((void**)(void*)ptr, size, flags);
} }
skipping to change at line 142 skipping to change at line 248
__inline__ __host__ cudaError_t cudaMalloc( __inline__ __host__ cudaError_t cudaMalloc(
T **devPtr, T **devPtr,
size_t size size_t size
) )
{ {
return cudaMalloc((void**)(void*)devPtr, size); return cudaMalloc((void**)(void*)devPtr, size);
} }
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaMallocHost( __inline__ __host__ cudaError_t cudaMallocHost(
T **ptr, T **ptr,
size_t size size_t size,
unsigned int flags = 0
) )
{ {
return cudaMallocHost((void**)(void*)ptr, size); return cudaMallocHost((void**)(void*)ptr, size, flags);
} }
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaMallocPitch( __inline__ __host__ cudaError_t cudaMallocPitch(
T **devPtr, T **devPtr,
size_t *pitch, size_t *pitch,
size_t width, size_t width,
size_t height size_t height
) )
{ {
skipping to change at line 168 skipping to change at line 275
} }
#if defined(__CUDACC__) #if defined(__CUDACC__)
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/**
* \addtogroup CUDART_HIGHLEVEL
* @{
*/
static __inline__ __host__ cudaError_t cudaMemcpyToSymbol( static __inline__ __host__ cudaError_t cudaMemcpyToSymbol(
char *symbol, char *symbol,
const void *src, const void *src,
size_t count, size_t count,
size_t offset = 0, size_t offset = 0,
enum cudaMemcpyKind kind = cudaMemcpyHostToDevice enum cudaMemcpyKind kind = cudaMemcpyHostToDevice
) )
{ {
return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind); return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind);
} }
skipping to change at line 365 skipping to change at line 467
*/ */
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaGetSymbolSize( __inline__ __host__ cudaError_t cudaGetSymbolSize(
size_t *size, size_t *size,
const T &symbol const T &symbol
) )
{ {
return cudaGetSymbolSize(size, (const char*)&symbol); return cudaGetSymbolSize(size, (const char*)&symbol);
} }
/** @} */ /* END CUDART_HIGHLEVEL */
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/** /**
* \addtogroup CUDART_HIGHLEVEL
*
* @{
*/
/**
* \brief \hl Binds a memory area to a texture * \brief \hl Binds a memory area to a texture
* *
* Binds \p size bytes of the memory area pointed to by \p devPtr to textur e * Binds \p size bytes of the memory area pointed to by \p devPtr to textur e
* reference \p tex. \p desc describes how the memory is interpreted when * reference \p tex. \p desc describes how the memory is interpreted when
* fetching values from the texture. The \p offset parameter is an optional * fetching values from the texture. The \p offset parameter is an optional
* byte offset as with the low-level * byte offset as with the low-level
* \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture()" * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture()"
* function. Any memory previously bound to \p tex is unbound. * function. Any memory previously bound to \p tex is unbound.
* *
* \param offset - Offset in bytes * \param offset - Offset in bytes
skipping to change at line 405 skipping to change at line 499
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer, * ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture * ::cudaErrorInvalidTexture
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" , * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)" , * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)" ,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ureToArray (C++ API)", ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
l descriptor)", reToArray (C++ API)",
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
UnbindTexture (C++ API)", onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d descriptor)",
im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTexture( __inline__ __host__ cudaError_t cudaBindTexture(
size_t *offset, size_t *offset,
const struct texture<T, dim, readMode> &tex, const struct texture<T, dim, readMode> &tex,
const void *devPtr, const void *devPtr,
const struct cudaChannelFormatDesc &desc, const struct cudaChannelFormatDesc &desc,
size_t size = UINT_MAX size_t size = UINT_MAX
) )
{ {
skipping to change at line 446 skipping to change at line 541
* \param devPtr - Memory area on device * \param devPtr - Memory area on device
* \param size - Size of the memory area pointed to by devPtr * \param size - Size of the memory area pointed to by devPtr
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer, * ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture * ::cudaErrorInvalidTexture
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API), * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)" , * \ref ::cudaBindTexture(size_t*, const struct textureReference*, const vo id*, const struct cudaChannelFormatDesc*, size_t) "cudaBindTexture (C API)" ,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
tor)", (C++ API)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ureToArray (C++ API)", ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
l descriptor), reToArray (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
descriptor)",
* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)", * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTexture( __inline__ __host__ cudaError_t cudaBindTexture(
size_t *offset, size_t *offset,
const struct texture<T, dim, readMode> &tex, const struct texture<T, dim, readMode> &tex,
const void *devPtr, const void *devPtr,
size_t size = UINT_MAX size_t size = UINT_MAX
) )
{ {
return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size); return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size);
skipping to change at line 478 skipping to change at line 574
* \brief \hl Binds a 2D memory area to a texture * \brief \hl Binds a 2D memory area to a texture
* *
* Binds the 2D memory area pointed to by \p devPtr to the * Binds the 2D memory area pointed to by \p devPtr to the
* texture reference \p tex. The size of the area is constrained by * texture reference \p tex. The size of the area is constrained by
* \p width in texel units, \p height in texel units, and \p pitch in byte * \p width in texel units, \p height in texel units, and \p pitch in byte
* units. \p desc describes how the memory is interpreted when fetching val ues * units. \p desc describes how the memory is interpreted when fetching val ues
* from the texture. Any memory previously bound to \p tex is unbound. * from the texture. Any memory previously bound to \p tex is unbound.
* *
* Since the hardware enforces an alignment requirement on texture base * Since the hardware enforces an alignment requirement on texture base
* addresses, * addresses,
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode >&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ t) "cudaBindTexture2D()" * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode> &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t ) "cudaBindTexture2D()"
* returns in \p *offset a byte offset that * returns in \p *offset a byte offset that
* must be applied to texture fetches in order to read from the desired mem ory. * must be applied to texture fetches in order to read from the desired mem ory.
* This offset must be divided by the texel size and passed to kernels that * This offset must be divided by the texel size and passed to kernels that
* read from the texture so they can be applied to the ::tex2D() function. * read from the texture so they can be applied to the ::tex2D() function.
* If the device memory pointer was returned from ::cudaMalloc(), the offse t is * If the device memory pointer was returned from ::cudaMalloc(), the offse t is
* guaranteed to be 0 and NULL may be passed as the \p offset parameter. * guaranteed to be 0 and NULL may be passed as the \p offset parameter.
* *
* \param offset - Offset in bytes * \param offset - Offset in bytes
* \param tex - Texture reference to bind * \param tex - Texture reference to bind
* \param devPtr - 2D memory area on device * \param devPtr - 2D memory area on device
skipping to change at line 501 skipping to change at line 597
* \param height - Height in texel units * \param height - Height in texel units
* \param pitch - Pitch in bytes * \param pitch - Pitch in bytes
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer, * ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture * ::cudaErrorInvalidTexture
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API), * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
e (C++ API), (C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi ndTexture2D (C API)", * \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi ndTexture2D (C API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ureToArray (C++ API)", ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
l descriptor), reToArray (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
descriptor)",
* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)", * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)" * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTexture2D( __inline__ __host__ cudaError_t cudaBindTexture2D(
size_t *offset, size_t *offset,
const struct texture<T, dim, readMode> &tex, const struct texture<T, dim, readMode> &tex,
const void *devPtr, const void *devPtr,
const struct cudaChannelFormatDesc &desc, const struct cudaChannelFormatDesc &desc,
size_t width, size_t width,
size_t height, size_t height,
size_t pitch size_t pitch
) )
{ {
return cudaBindTexture2D( offset, &tex, devPtr, &desc, width, height, pit return cudaBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitc
ch); h);
}
/**
* \brief \hl Binds a 2D memory area to a texture
*
* Binds the 2D memory area pointed to by \p devPtr to the
* texture reference \p tex. The size of the area is constrained by
* \p width in texel units, \p height in texel units, and \p pitch in byte
* units. The channel descriptor is inherited from the texture reference
* type. Any memory previously bound to \p tex is unbound.
*
* Since the hardware enforces an alignment requirement on texture base
* addresses,
* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
&, const void*, size_t, size_t, size_t) "cudaBindTexture2D()"
* returns in \p *offset a byte offset that
* must be applied to texture fetches in order to read from the desired mem
ory.
* This offset must be divided by the texel size and passed to kernels that
* read from the texture so they can be applied to the ::tex2D() function.
* If the device memory pointer was returned from ::cudaMalloc(), the offse
t is
* guaranteed to be 0 and NULL may be passed as the \p offset parameter.
*
* \param offset - Offset in bytes
* \param tex - Texture reference to bind
* \param devPtr - 2D memory area on device
* \param width - Width in texel units
* \param height - Height in texel units
* \param pitch - Pitch in bytes
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture
* \notefnerr
*
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
,
* ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
(C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
or)",
* \ref ::cudaBindTexture2D(size_t*, const struct textureReference*, const
void*, const struct cudaChannelFormatDesc*, size_t, size_t, size_t) "cudaBi
ndTexture2D (C API)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
reToArray (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
descriptor)",
* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
*/
template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTexture2D(
size_t *offset,
const struct texture<T, dim, readMode> &tex,
const void *devPtr,
size_t width,
size_t height,
size_t pitch
)
{
return cudaBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, h
eight, pitch);
} }
/** /**
* \brief \hl Binds an array to a texture * \brief \hl Binds an array to a texture
* *
* Binds the CUDA array \p array to the texture reference \p tex. * Binds the CUDA array \p array to the texture reference \p tex.
* \p desc describes how the memory is interpreted when fetching values fro m * \p desc describes how the memory is interpreted when fetching values fro m
* the texture. Any CUDA array previously bound to \p tex is unbound. * the texture. Any CUDA array previously bound to \p tex is unbound.
* *
* \param tex - Texture to bind * \param tex - Texture to bind
skipping to change at line 545 skipping to change at line 699
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer, * ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture * ::cudaErrorInvalidTexture
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" , * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
e (C++ API)", (C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct textureReference*, const stru ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)", * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
l descriptor)", descriptor)",
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
UnbindTexture (C++ API)", nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTextureToArray( __inline__ __host__ cudaError_t cudaBindTextureToArray(
const struct texture<T, dim, readMode> &tex, const struct texture<T, dim, readMode> &tex,
const struct cudaArray *array, const struct cudaArray *array,
const struct cudaChannelFormatDesc &desc const struct cudaChannelFormatDesc &desc
) )
{ {
return cudaBindTextureToArray(&tex, array, &desc); return cudaBindTextureToArray(&tex, array, &desc);
} }
skipping to change at line 582 skipping to change at line 737
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidDevicePointer, * ::cudaErrorInvalidDevicePointer,
* ::cudaErrorInvalidTexture * ::cudaErrorInvalidTexture
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" , * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
e (C++ API)", (C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct textureReference*, const stru ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)", * \ref ::cudaBindTextureToArray(const struct textureReference*, const stru ct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
ureToArray (C++ API)", reToArray (C++ API)",
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda * \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
UnbindTexture (C++ API)", nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaBindTextureToArray( __inline__ __host__ cudaError_t cudaBindTextureToArray(
const struct texture<T, dim, readMode> &tex, const struct texture<T, dim, readMode> &tex,
const struct cudaArray *array const struct cudaArray *array
) )
{ {
struct cudaChannelFormatDesc desc; struct cudaChannelFormatDesc desc;
cudaError_t err = cudaGetChannelDesc(&desc, array); cudaError_t err = cudaGetChannelDesc(&desc, array);
skipping to change at line 620 skipping to change at line 776
* *
* Unbinds the texture bound to \p tex. * Unbinds the texture bound to \p tex.
* *
* \param tex - Texture to unbind * \param tex - Texture to unbind
* *
* \return ::cudaSuccess * \return ::cudaSuccess
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" , * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
e (C++ API)", (C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ureToArray (C++ API)", ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
l descriptor)", reToArray (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
descriptor)",
* \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText ure (C API)", * \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText ure (C API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)" * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaUnbindTexture( __inline__ __host__ cudaError_t cudaUnbindTexture(
const struct texture<T, dim, readMode> &tex const struct texture<T, dim, readMode> &tex
) )
{ {
return cudaUnbindTexture(&tex); return cudaUnbindTexture(&tex);
} }
/************************************************************************** ***** /************************************************************************** *****
skipping to change at line 659 skipping to change at line 816
* \param tex - Texture to get offset of * \param tex - Texture to get offset of
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidTexture, * ::cudaErrorInvalidTexture,
* ::cudaErrorInvalidTextureBinding * ::cudaErrorInvalidTextureBinding
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" , * \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
* ::cudaGetChannelDesc, ::cudaGetTextureReference, * ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
e (C++ API)", (C++ API)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>& * \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
tor)", or)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_ &, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
t) "cudaBindTexture2D (C++ API)", ) "cudaBindTexture2D (C++ API)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText &, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
ureToArray (C++ API)", ted channel descriptor)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
l descriptor)", reToArray (C++ API)",
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda * \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
UnbindTexture (C++ API)", onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
descriptor)",
* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
nbindTexture (C++ API)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct textureRefere nce*) "cudaGetTextureAlignmentOffset (C API)" * \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct textureRefere nce*) "cudaGetTextureAlignmentOffset (C API)"
*/ */
template<class T, int dim, enum cudaTextureReadMode readMode> template<class T, int dim, enum cudaTextureReadMode readMode>
__inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset( __inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset(
size_t *offset, size_t *offset,
const struct texture<T, dim, readMode> &tex const struct texture<T, dim, readMode> &tex
) )
{ {
return cudaGetTextureAlignmentOffset(offset, &tex); return cudaGetTextureAlignmentOffset(offset, &tex);
} }
/** @} */ /* END CUDART_HIGHLEVEL */
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/** /**
* \ingroup CUDART_HIGHLEVEL
* \brief Sets the preferred cache configuration for a device function * \brief Sets the preferred cache configuration for a device function
* *
* On devices where the L1 cache and shared memory use the same hardware * On devices where the L1 cache and shared memory use the same hardware
* resources, this sets through \p cacheConfig the preferred cache configur ation * resources, this sets through \p cacheConfig the preferred cache configur ation
* for the function specified via \p func. This is only a preference. The * for the function specified via \p func. This is only a preference. The
* runtime will use the requested configuration if possible, but it is free to * runtime will use the requested configuration if possible, but it is free to
* choose a different configuration if required to execute \p func. * choose a different configuration if required to execute \p func.
* *
* \p func can either be a pointer to a function that executes * \p func can either be a pointer to a function that executes
* on the device, or it can be a character string specifying the * on the device, or it can be a character string specifying the
* fully-decorated (C++) name for a function that executes on the device. * fully-decorated (C++) name for a function that executes on the device.
* The parameter specified by \p func must be declared as a \p __global__ * The parameter specified by \p func must be declared as a \p __global__
* function. If the specified function does not exist, * function. If the specified function does not exist,
* then ::cudaErrorInvalidDeviceFunction is returned. * then ::cudaErrorInvalidDeviceFunction is returned.
* *
* This setting does nothing on devices where the size of the L1 cache and * This setting does nothing on devices where the size of the L1 cache and
* shared memory are fixed. * shared memory are fixed.
* *
* Switching between configuration modes may insert a device-side * Launching a kernel with a different preference than the most recent
* synchronization point for streamed kernel launches. * preference setting may insert a device-side synchronization point.
* *
* \param func - Device char string naming device function * The supported cache configurations are:
* \param cacheConfig - Cache configuration mode * - ::cudaFuncCachePreferNone: no preference for shared memory or L1 (defa
ult)
* - ::cudaFuncCachePreferShared: prefer larger shared memory and smaller L
1 cache
* - ::cudaFuncCachePreferL1: prefer larger L1 cache and smaller shared mem
ory
*
* \param func - Char string naming device function
* \param cacheConfig - Requested cache configuration
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInitializationError, * ::cudaErrorInitializationError,
* ::cudaErrorInvalidDeviceFunction * ::cudaErrorInvalidDeviceFunction
* \notefnerr * \notefnerr
* *
* \sa ::cudaConfigureCall, * \sa ::cudaConfigureCall,
* \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc SetCacheConfig (C API)", * \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc SetCacheConfig (C API)",
* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe tAttributes (C++ API)", * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe tAttributes (C++ API)",
* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",
* ::cudaSetDoubleForDevice, * ::cudaSetDoubleForDevice,
* ::cudaSetDoubleForHost, * ::cudaSetDoubleForHost,
* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)",
* ::cudaThreadGetCacheConfig,
* ::cudaThreadSetCacheConfig
*/ */
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaFuncSetCacheConfig( __inline__ __host__ cudaError_t cudaFuncSetCacheConfig(
T *func, T *func,
enum cudaFuncCache cacheConfig enum cudaFuncCache cacheConfig
) )
{ {
return cudaFuncSetCacheConfig((const char*)func, cacheConfig); return cudaFuncSetCacheConfig((const char*)func, cacheConfig);
} }
/** /**
* \ingroup CUDART_HIGHLEVEL
* \brief \hl Launches a device function * \brief \hl Launches a device function
* *
* Launches the function \p entry on the device. The parameter \p entry can * Launches the function \p entry on the device. The parameter \p entry can
* either be a function that executes on the device, or it can be a charact er * either be a function that executes on the device, or it can be a charact er
* string, naming a function that executes on the device. The parameter * string, naming a function that executes on the device. The parameter
* specified by \p entry must be declared as a \p __global__ function. * specified by \p entry must be declared as a \p __global__ function.
* \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to * \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to
* ::cudaConfigureCall() since it pops the data that was pushed by * ::cudaConfigureCall() since it pops the data that was pushed by
* ::cudaConfigureCall() from the execution stack. * ::cudaConfigureCall() from the execution stack.
* *
* \param entry - Device function pointer or char string naming device func tion * \param entry - Device function pointer or char string naming device func tion
* to execute * to execute
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidDeviceFunction, * ::cudaErrorInvalidDeviceFunction,
* ::cudaErrorInvalidConfiguration, * ::cudaErrorInvalidConfiguration,
* ::cudaErrorLaunchFailure, * ::cudaErrorLaunchFailure,
* ::cudaErrorPriorLaunchFailure,
* ::cudaErrorLaunchTimeout, * ::cudaErrorLaunchTimeout,
* ::cudaErrorLaunchOutOfResources, * ::cudaErrorLaunchOutOfResources,
* ::cudaErrorSharedObjectSymbolNotFound, * ::cudaErrorSharedObjectSymbolNotFound,
* ::cudaErrorSharedObjectInitFailed * ::cudaErrorSharedObjectInitFailed
* \notefnerr * \notefnerr
* *
* \sa ::cudaConfigureCall, * \sa ::cudaConfigureCall,
* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)", * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",
* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe tAttributes (C++ API)", * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, T*) "cudaFuncGe tAttributes (C++ API)",
* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)", * \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",
* ::cudaSetDoubleForDevice, * ::cudaSetDoubleForDevice,
* ::cudaSetDoubleForHost, * ::cudaSetDoubleForHost,
* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)",
* ::cudaThreadGetCacheConfig,
* ::cudaThreadSetCacheConfig
*/ */
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaLaunch( __inline__ __host__ cudaError_t cudaLaunch(
T *entry T *entry
) )
{ {
return cudaLaunch((const char*)entry); return cudaLaunch((const char*)entry);
} }
/** /**
* \ingroup CUDART_HIGHLEVEL
* \brief \hl Find out attributes for a given function * \brief \hl Find out attributes for a given function
* *
* This function obtains the attributes of a function specified via \p entr y. * This function obtains the attributes of a function specified via \p entr y.
* The parameter \p entry can either be a pointer to a function that execut es * The parameter \p entry can either be a pointer to a function that execut es
* on the device, or it can be a character string specifying the * on the device, or it can be a character string specifying the
* fully-decorated (C++) name of a function that executes on the device. Th e * fully-decorated (C++) name of a function that executes on the device. Th e
* parameter specified by \p entry must be declared as a \p __global__ * parameter specified by \p entry must be declared as a \p __global__
* function. The fetched attributes are placed in \p attr. If the specified * function. The fetched attributes are placed in \p attr. If the specified
* function does not exist, then ::cudaErrorInvalidDeviceFunction is return ed. * function does not exist, then ::cudaErrorInvalidDeviceFunction is return ed.
* *
* Note that some function attributes such as
* \ref ::cudaFuncAttributes::maxThreadsPerBlock "maxThreadsPerBlock"
* may vary based on the device that is currently being used.
*
* \param attr - Return pointer to function's attributes * \param attr - Return pointer to function's attributes
* \param entry - Function to get attributes of * \param entry - Function to get attributes of
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInitializationError, * ::cudaErrorInitializationError,
* ::cudaErrorInvalidDeviceFunction * ::cudaErrorInvalidDeviceFunction
* \notefnerr * \notefnerr
* *
* \sa ::cudaConfigureCall, * \sa ::cudaConfigureCall,
* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)", * \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",
* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, const char*) "c udaFuncGetAttributes (C API)", * \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes*, const char*) "c udaFuncGetAttributes (C API)",
* \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)", * \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)",
* ::cudaSetDoubleForDevice, * ::cudaSetDoubleForDevice,
* ::cudaSetDoubleForHost, * ::cudaSetDoubleForHost,
* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)" * \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)"
*/ */
template<class T> template<class T>
__inline__ __host__ cudaError_t cudaFuncGetAttributes( __inline__ __host__ cudaError_t cudaFuncGetAttributes(
struct cudaFuncAttributes *attr, struct cudaFuncAttributes *attr,
T *entry T *entry
) )
{ {
return cudaFuncGetAttributes(attr, (const char*)entry); return cudaFuncGetAttributes(attr, (const char*)entry);
} }
/** /**
* \ingroup CUDART_HIGHLEVEL
* \brief \hl Binds an array to a surface * \brief \hl Binds an array to a surface
* *
* Binds the CUDA array \p array to the surface reference \p surf. * Binds the CUDA array \p array to the surface reference \p surf.
* \p desc describes how the memory is interpreted when dealing with * \p desc describes how the memory is interpreted when dealing with
* the surface. Any CUDA array previously bound to \p surf is unbound. * the surface. Any CUDA array previously bound to \p surf is unbound.
* *
* \param surf - Surface to bind * \param surf - Surface to bind
* \param array - Memory array on device * \param array - Memory array on device
* \param desc - Channel format * \param desc - Channel format
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidSurface * ::cudaErrorInvalidSurface
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)", * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",
* \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru ct cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descript or)" * \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc t cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descripto r)"
*/ */
template<class T, int dim> template<class T, int dim>
__inline__ __host__ cudaError_t cudaBindSurfaceToArray( __inline__ __host__ cudaError_t cudaBindSurfaceToArray(
const struct surface<T, dim> &surf, const struct surface<T, dim> &surf,
const struct cudaArray *array, const struct cudaArray *array,
const struct cudaChannelFormatDesc &desc const struct cudaChannelFormatDesc &desc
) )
{ {
return cudaBindSurfaceToArray(&surf, array, &desc); return cudaBindSurfaceToArray(&surf, array, &desc);
} }
/** /**
* \ingroup CUDART_HIGHLEVEL
* \brief \hl Binds an array to a surface * \brief \hl Binds an array to a surface
* *
* Binds the CUDA array \p array to the surface reference \p surf. * Binds the CUDA array \p array to the surface reference \p surf.
* The channel descriptor is inherited from the CUDA array. Any CUDA array * The channel descriptor is inherited from the CUDA array. Any CUDA array
* previously bound to \p surf is unbound. * previously bound to \p surf is unbound.
* *
* \param surf - Surface to bind * \param surf - Surface to bind
* \param array - Memory array on device * \param array - Memory array on device
* *
* \return * \return
* ::cudaSuccess, * ::cudaSuccess,
* ::cudaErrorInvalidValue, * ::cudaErrorInvalidValue,
* ::cudaErrorInvalidSurface * ::cudaErrorInvalidSurface
* \notefnerr * \notefnerr
* *
* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)", * \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference*, const struct cudaArray*, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",
* \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru ct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray (C++ API)" * \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc t cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray (C++ API)"
*/ */
template<class T, int dim> template<class T, int dim>
__inline__ __host__ cudaError_t cudaBindSurfaceToArray( __inline__ __host__ cudaError_t cudaBindSurfaceToArray(
const struct surface<T, dim> &surf, const struct surface<T, dim> &surf,
const struct cudaArray *array const struct cudaArray *array
) )
{ {
struct cudaChannelFormatDesc desc; struct cudaChannelFormatDesc desc;
cudaError_t err = cudaGetChannelDesc(&desc, array); cudaError_t err = cudaGetChannelDesc(&desc, array);
return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e rr; return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e rr;
} }
#endif /* __CUDACC__ */ #endif /* __CUDACC__ */
/** @} */ /* END CUDART_HIGHLEVEL */
#endif /* __cplusplus */ #endif /* __cplusplus */
#endif /* !__CUDA_RUNTIME_H__ */ #endif /* !__CUDA_RUNTIME_H__ */
 End of changes. 41 change blocks. 
147 lines changed or deleted 367 lines changed or added


 cuda_vdpau_interop.h   cuda_vdpau_interop.h 
skipping to change at line 39 skipping to change at line 39
* source code with only those rights set forth herein. * source code with only those rights set forth herein.
* *
* Any use of this source code in individual and commercial software must * Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#if !defined(__CUDA_VDPAU_INTEROP_H__) #if !defined(__CUDA_VDPAU_INTEROP_H__)
#define __CUDA_VDPAU_INTEROP_H__ #define __CUDA_VDPAU_INTEROP_H__
/**************************************************************************
*****
*
*
*
*
*
*
***************************************************************************
****/
#include "builtin_types.h" #include "builtin_types.h"
#include "host_defines.h" #include "host_defines.h"
#include <vdpau/vdpau.h> #include <vdpau/vdpau.h>
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
/************************************************************************** /**
***** * \addtogroup CUDART_VDPAU VDPAU Interoperability
* * This section describes the VDPAU interoperability functions of the CUDA
* * runtime application programming interface.
* *
* * @{
* */
*
***************************************************************************
****/
/**
* \brief Gets the CUDA device associated with a VdpDevice.
*
* Returns the CUDA device associated with a VdpDevice, if applicable.
*
* \param device - Returns the device associated with vdpDevice, or -1 if
* the device associated with vdpDevice is not a compute device.
* \param vdpDevice - A VdpDevice handle
* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
*
* \return
* ::cudaSuccess
* \notefnerr
*
* \sa ::cudaVDPAUSetVDPAUDevice
*/
extern __host__ cudaError_t CUDARTAPI cudaVDPAUGetDevice(int *device, VdpDe vice vdpDevice, VdpGetProcAddress *vdpGetProcAddress); extern __host__ cudaError_t CUDARTAPI cudaVDPAUGetDevice(int *device, VdpDe vice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
/**
* \brief Sets the CUDA device for use with VDPAU interoperability
*
* Records \p device as the device on which the active host thread executes
* the device code. Records the thread as using VDPAU interoperability.
* If the host thread has already initialized the CUDA runtime by
* calling non-device management runtime functions or if there exists a CUD
A
* driver context active on the host thread, then this call returns
* ::cudaErrorSetOnActiveProcess.
*
* \param device - Device to use for VDPAU interoperability
* \param vdpDevice - The VdpDevice to interoperate with
* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorSetOnActiveProcess
* \notefnerr
*
* \sa ::cudaGraphicsVDPAURegisterVideoSurface,
* ::cudaGraphicsVDPAURegisterOutputSurface
*/
extern __host__ cudaError_t CUDARTAPI cudaVDPAUSetVDPAUDevice(int device, V dpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress); extern __host__ cudaError_t CUDARTAPI cudaVDPAUSetVDPAUDevice(int device, V dpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
/**
* \brief Register a VdpVideoSurface object
*
* Registers the VdpVideoSurface specified by \p vdpSurface for access by C
UDA.
* A handle to the registered object is returned as \p resource.
* The surface's intended usage is specified using \p flags, as follows:
*
* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
* will not write to this resource.
* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* \param resource - Pointer to the returned object handle
* \param vdpSurface - VDPAU object to be registered
* \param flags - Map flags
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidResourceHandle,
* ::cudaErrorUnknown
* \notefnerr
*
* \sa
* ::cudaVDPAUSetVDPAUDevice
* ::cudaGraphicsUnregisterResource,
* ::cudaGraphicsSubResourceGetMappedArray
*/
extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterVideoSurface (struct cudaGraphicsResource **resource, VdpVideoSurface vdpSurface, unsign ed int flags); extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterVideoSurface (struct cudaGraphicsResource **resource, VdpVideoSurface vdpSurface, unsign ed int flags);
/**
* \brief Register a VdpOutputSurface object
*
* Registers the VdpOutputSurface specified by \p vdpSurface for access by
CUDA.
* A handle to the registered object is returned as \p resource.
* The surface's intended usage is specified using \p flags, as follows:
*
* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
* resource will be used. It is therefore assumed that this resource will
be
* read from and written to by CUDA. This is the default value.
* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
* will not write to this resource.
* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
* CUDA will not read from this resource and will write over the
* entire contents of the resource, so none of the data previously
* stored in the resource will be preserved.
*
* \param resource - Pointer to the returned object handle
* \param vdpSurface - VDPAU object to be registered
* \param flags - Map flags
*
* \return
* ::cudaSuccess,
* ::cudaErrorInvalidDevice,
* ::cudaErrorInvalidValue,
* ::cudaErrorInvalidResourceHandle,
* ::cudaErrorUnknown
* \notefnerr
*
* \sa
* ::cudaVDPAUSetVDPAUDevice
* ::cudaGraphicsUnregisterResource,
* ::cudaGraphicsSubResourceGetMappedArray
*/
extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterOutputSurfac e(struct cudaGraphicsResource **resource, VdpOutputSurface vdpSurface, unsi gned int flags); extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterOutputSurfac e(struct cudaGraphicsResource **resource, VdpOutputSurface vdpSurface, unsi gned int flags);
/** @} */ /* END CUDART_VDPAU */
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif /* __cplusplus */ #endif /* __cplusplus */
#endif /* __CUDA_VDPAU_INTEROP_H__ */ #endif /* __CUDA_VDPAU_INTEROP_H__ */
 End of changes. 7 change blocks. 
21 lines changed or deleted 124 lines changed or added


 cufft.h   cufft.h 
skipping to change at line 69 skipping to change at line 69
// CUFFT API function return values // CUFFT API function return values
typedef enum cufftResult_t { typedef enum cufftResult_t {
CUFFT_SUCCESS = 0x0, CUFFT_SUCCESS = 0x0,
CUFFT_INVALID_PLAN = 0x1, CUFFT_INVALID_PLAN = 0x1,
CUFFT_ALLOC_FAILED = 0x2, CUFFT_ALLOC_FAILED = 0x2,
CUFFT_INVALID_TYPE = 0x3, CUFFT_INVALID_TYPE = 0x3,
CUFFT_INVALID_VALUE = 0x4, CUFFT_INVALID_VALUE = 0x4,
CUFFT_INTERNAL_ERROR = 0x5, CUFFT_INTERNAL_ERROR = 0x5,
CUFFT_EXEC_FAILED = 0x6, CUFFT_EXEC_FAILED = 0x6,
CUFFT_SETUP_FAILED = 0x7, CUFFT_SETUP_FAILED = 0x7,
CUFFT_INVALID_SIZE = 0x8 CUFFT_INVALID_SIZE = 0x8,
CUFFT_UNALIGNED_DATA = 0x9
} cufftResult; } cufftResult;
// CUFFT defines and supports the following data types // CUFFT defines and supports the following data types
// cufftHandle is a handle type used to store and access CUFFT plans. // cufftHandle is a handle type used to store and access CUFFT plans.
typedef unsigned int cufftHandle; typedef unsigned int cufftHandle;
// cufftReal is a single-precision, floating-point real data type. // cufftReal is a single-precision, floating-point real data type.
// cufftDoubleReal is a double-precision, real data type. // cufftDoubleReal is a double-precision, real data type.
typedef float cufftReal; typedef float cufftReal;
 End of changes. 1 change blocks. 
1 lines changed or deleted 2 lines changed or added


 device_functions.h   device_functions.h 
skipping to change at line 76 skipping to change at line 76
extern __device__ long long int __mul64hi(long long int, long long int); extern __device__ long long int __mul64hi(long long int, long long int);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ unsigned long long int __umul64hi(unsigned long long int, unsigned long long int); extern __device__ unsigned long long int __umul64hi(unsigned long long int, unsigned long long int);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ float __int_as_float(int); extern __device__ float __int_as_float(int);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ int __float_as_int(float); extern __device__ int __float_as_int(float);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ void __synchronous_start(int);
/*DEVICE_BUILTIN*/
extern __device__ void __synchronous_end(void);
/*DEVICE_BUILTIN*/
extern __device__ void __syncthreads(void); extern __device__ void __syncthreads(void);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ void __prof_trigger(int); extern __device__ void __prof_trigger(int);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ void __threadfence(void); extern __device__ void __threadfence(void);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ void __threadfence_block(void); extern __device__ void __threadfence_block(void);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
extern __device__ void __trap(void); extern __device__ void __trap(void);
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
 End of changes. 1 change blocks. 
4 lines changed or deleted 0 lines changed or added


 driver_functions.h   driver_functions.h 
skipping to change at line 39 skipping to change at line 39
* source code with only those rights set forth herein. * source code with only those rights set forth herein.
* *
* Any use of this source code in individual and commercial software must * Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code, * include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice. * the above Disclaimer and U.S. Government End Users Notice.
*/ */
#if !defined(__DRIVER_FUNCTIONS_H__) #if !defined(__DRIVER_FUNCTIONS_H__)
#define __DRIVER_FUNCTIONS_H__ #define __DRIVER_FUNCTIONS_H__
/**************************************************************************
*****
*
*
*
*
*
*
***************************************************************************
****/
#include "builtin_types.h" #include "builtin_types.h"
#include "host_defines.h" #include "host_defines.h"
#include "driver_types.h" #include "driver_types.h"
/************************************************************************** /**
***** * \addtogroup CUDART_MEMORY
* *
* * @{
* */
*
*
*
***************************************************************************
****/
/**
* \brief Returns a ::cudaPitchedPtr based on input parameters
*
* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
* \p p, \p xsz, and \p ysz.
*
* \param d - Pointer to allocated memory
* \param p - Pitch of allocated memory in bytes
* \param xsz - Logical width of allocation in elements
* \param ysz - Logical height of allocation in elements
*
* \return
* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
*
* \sa make_cudaExtent, make_cudaPos
*/
static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void * d, size_t p, size_t xsz, size_t ysz) static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void * d, size_t p, size_t xsz, size_t ysz)
{ {
struct cudaPitchedPtr s; struct cudaPitchedPtr s;
s.ptr = d; s.ptr = d;
s.pitch = p; s.pitch = p;
s.xsize = xsz; s.xsize = xsz;
s.ysize = ysz; s.ysize = ysz;
return s; return s;
} }
/**
* \brief Returns a ::cudaPos based on input parameters
*
* Returns a ::cudaPos based on the specified input parameters \p x,
* \p y, and \p z.
*
* \param x - X position
* \param y - Y position
* \param z - Z position
*
* \return
* ::cudaPos specified by \p x, \p y, and \p z
*
* \sa make_cudaExtent, make_cudaPitchedPtr
*/
static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z) static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
{ {
struct cudaPos p; struct cudaPos p;
p.x = x; p.x = x;
p.y = y; p.y = y;
p.z = z; p.z = z;
return p; return p;
} }
/**
* \brief Returns a ::cudaExtent based on input parameters
*
* Returns a ::cudaExtent based on the specified input parameters \p w,
* \p h, and \p d.
*
* \param w - Width in bytes
* \param h - Height in elements
* \param d - Depth in elements
*
* \return
* ::cudaExtent specified by \p w, \p h, and \p d
*
* \sa make_cudaPitchedPtr, make_cudaPos
*/
static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size _t h, size_t d) static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size _t h, size_t d)
{ {
struct cudaExtent e; struct cudaExtent e;
e.width = w; e.width = w;
e.height = h; e.height = h;
e.depth = d; e.depth = d;
return e; return e;
} }
/** @} */ /* END CUDART_MEMORY */
#endif /* !__DRIVER_FUNCTIONS_H__ */ #endif /* !__DRIVER_FUNCTIONS_H__ */
 End of changes. 6 change blocks. 
21 lines changed or deleted 53 lines changed or added


 driver_types.h   driver_types.h 
skipping to change at line 63 skipping to change at line 63
* * * *
* TYPE DEFINITIONS USED BY RUNTIME API * * TYPE DEFINITIONS USED BY RUNTIME API *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
#if !defined(__CUDA_INTERNAL_COMPILATION__) #if !defined(__CUDA_INTERNAL_COMPILATION__)
#include <limits.h> #include <limits.h>
#include <stddef.h> #include <stddef.h>
#define cudaHostAllocDefault 0 ///< Default page-locked allocation #define cudaHostAllocDefault 0 /**< Default page-locked allocation
flag flag */
#define cudaHostAllocPortable 1 ///< Pinned memory accessible by al #define cudaHostAllocPortable 1 /**< Pinned memory accessible by al
l CUDA contexts l CUDA contexts */
#define cudaHostAllocMapped 2 ///< Map allocation into device spa #define cudaHostAllocMapped 2 /**< Map allocation into device spa
ce ce */
#define cudaHostAllocWriteCombined 4 ///< Write-combined memory #define cudaHostAllocWriteCombined 4 /**< Write-combined memory */
#define cudaEventDefault 0 ///< Default event flag #define cudaEventDefault 0 /**< Default event flag */
#define cudaEventBlockingSync 1 ///< Event uses blocking synchroniz #define cudaEventBlockingSync 1 /**< Event uses blocking synchroniz
ation ation */
#define cudaEventDisableTiming 2 /**< Event will not record timing d
ata */
#define cudaDeviceScheduleAuto 0 ///< Device flag - Automatic schedu #define cudaDeviceScheduleAuto 0 /**< Device flag - Automatic schedu
ling ling */
#define cudaDeviceScheduleSpin 1 ///< Device flag - Spin default sch #define cudaDeviceScheduleSpin 1 /**< Device flag - Spin default sch
eduling eduling */
#define cudaDeviceScheduleYield 2 ///< Device flag - Yield default sc #define cudaDeviceScheduleYield 2 /**< Device flag - Yield default sc
heduling heduling */
#define cudaDeviceBlockingSync 4 ///< Device flag - Use blocking syn #define cudaDeviceBlockingSync 4 /**< Device flag - Use blocking syn
chronization chronization */
#define cudaDeviceMapHost 8 ///< Device flag - Support mapped p #define cudaDeviceMapHost 8 /**< Device flag - Support mapped p
inned allocations inned allocations */
#define cudaDeviceLmemResizeToMax 16 ///< Device flag - Keep local memor #define cudaDeviceLmemResizeToMax 16 /**< Device flag - Keep local memor
y allocation after launch y allocation after launch */
#define cudaDeviceMask 0x1f ///< Device flags mask #define cudaDeviceMask 0x1f /**< Device flags mask */
#define cudaArraySurfaceLoadStore 0x02 ///< Must be set in cudaMallocArra #define cudaArrayDefault 0x00 /**< Default CUDA array allocation
y in order to bind surfaces to the CUDA array flag */
#define cudaArraySurfaceLoadStore 0x02 /**< Must be set in cudaMallocArray
in order to bind surfaces to the CUDA array */
#endif /* !__CUDA_INTERNAL_COMPILATION__ */ #endif /* !__CUDA_INTERNAL_COMPILATION__ */
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/** /**
* CUDA error types * CUDA error types
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaError enum cudaError
{ {
cudaSuccess = 0, ///< No errors /**
cudaErrorMissingConfiguration = 1, ///< Missing configurat * The API call returned with no errors. In the case of query calls, this
ion error * can also mean that the operation being queried is complete (see
cudaErrorMemoryAllocation = 2, ///< Memory allocation * ::cudaEventQuery() and ::cudaStreamQuery()).
error */
cudaErrorInitializationError = 3, ///< Initialization err cudaSuccess = 0,
or
cudaErrorLaunchFailure = 4, ///< Launch failure /**
cudaErrorPriorLaunchFailure = 5, ///< Prior launch failu * The device function being invoked (usually via ::cudaLaunch()) was not
re * previously configured via the ::cudaConfigureCall() function.
cudaErrorLaunchTimeout = 6, ///< Launch timeout err */
or cudaErrorMissingConfiguration = 1,
cudaErrorLaunchOutOfResources = 7, ///< Launch out of reso
urces error /**
cudaErrorInvalidDeviceFunction = 8, ///< Invalid device fun * The API call failed because it was unable to allocate enough memory to
ction * perform the requested operation.
cudaErrorInvalidConfiguration = 9, ///< Invalid configurat */
ion cudaErrorMemoryAllocation = 2,
cudaErrorInvalidDevice = 10, ///< Invalid device
cudaErrorInvalidValue = 11, ///< Invalid value /**
cudaErrorInvalidPitchValue = 12, ///< Invalid pitch valu * The API call failed because the CUDA driver and runtime could not be
e * initialized.
cudaErrorInvalidSymbol = 13, ///< Invalid symbol */
cudaErrorMapBufferObjectFailed = 14, ///< Map buffer object cudaErrorInitializationError = 3,
failed
cudaErrorUnmapBufferObjectFailed = 15, ///< Unmap buffer objec /**
t failed * An exception occurred on the device while executing a kernel. Common
cudaErrorInvalidHostPointer = 16, ///< Invalid host point * causes include dereferencing an invalid device pointer and accessing
er * out of bounds shared memory. The device cannot be used until
cudaErrorInvalidDevicePointer = 17, ///< Invalid device poi * ::cudaThreadExit() is called. All existing device memory allocations
nter * are invalid and must be reconstructed if the program is to continue
cudaErrorInvalidTexture = 18, ///< Invalid texture * using CUDA.
cudaErrorInvalidTextureBinding = 19, ///< Invalid texture bi */
nding cudaErrorLaunchFailure = 4,
cudaErrorInvalidChannelDescriptor = 20, ///< Invalid channel de
scriptor /**
cudaErrorInvalidMemcpyDirection = 21, ///< Invalid memcpy dir * This indicated that a previous kernel launch failed. This was previous
ection ly
cudaErrorAddressOfConstant = 22, ///< Address of constan * used for device emulation of kernel launches.
t error * \deprecated
///< \deprecated * This error return is deprecated as of CUDA 3.1. Device emulation mode
///< This error return was
is deprecated as of * removed with the CUDA 3.1 release.
///< Cuda 3.1. Variable */
s in constant memory cudaErrorPriorLaunchFailure = 5,
///< may now have their
address taken by the /**
///< runtime via ::cuda * This indicates that the device kernel took too long to execute. This c
GetSymbolAddress(). an
cudaErrorTextureFetchFailed = 23, ///< Texture fetch fail * only occur if timeouts are enabled - see the device property
ed * \ref ::cudaDeviceProp::kernelExecTimeoutEnabled "kernelExecTimeoutEnab
cudaErrorTextureNotBound = 24, ///< Texture not bound led"
error * for more information. The device cannot be used until ::cudaThreadExit
cudaErrorSynchronizationError = 25, ///< Synchronization er ()
ror * is called. All existing device memory allocations are invalid and must
cudaErrorInvalidFilterSetting = 26, ///< Invalid filter set be
ting * reconstructed if the program is to continue using CUDA.
cudaErrorInvalidNormSetting = 27, ///< Invalid norm setti */
ng cudaErrorLaunchTimeout = 6,
cudaErrorMixedDeviceExecution = 28, ///< Mixed device execu
tion /**
cudaErrorCudartUnloading = 29, ///< CUDA runtime unloa * This indicates that a launch did not occur because it did not have
ding * appropriate resources. Although this error is similar to
cudaErrorUnknown = 30, ///< Unknown error cond * ::cudaErrorInvalidConfiguration, this error usually indicates that the
ition * user has attempted to pass too many arguments to the device kernel, or
cudaErrorNotYetImplemented = 31, ///< Function not yet i the
mplemented * kernel launch specifies too many threads for the kernel's register cou
cudaErrorMemoryValueTooLarge = 32, ///< Memory value too l nt.
arge */
cudaErrorInvalidResourceHandle = 33, ///< Invalid resource h cudaErrorLaunchOutOfResources = 7,
andle
cudaErrorNotReady = 34, ///< Not ready error /**
cudaErrorInsufficientDriver = 35, ///< CUDA runtime is ne * The requested device function does not exist or is not compiled for th
wer than driver e
cudaErrorSetOnActiveProcess = 36, ///< Set on active proc * proper device architecture.
ess error */
cudaErrorInvalidSurface = 37, ///< Invalid surface cudaErrorInvalidDeviceFunction = 8,
cudaErrorNoDevice = 38, ///< No Cuda-capable de
vices detected /**
cudaErrorECCUncorrectable = 39, ///< Uncorrectable ECC * This indicates that a kernel launch is requesting resources that can
error detected * never be satisfied by the current device. Requesting more shared memor
cudaErrorSharedObjectSymbolNotFound = 40, ///< Link to a shared o y
bject failed to resolve * per block than the device supports will trigger this error, as will
cudaErrorSharedObjectInitFailed = 41, ///< Shared object init * requesting too many threads or blocks. See ::cudaDeviceProp for more
ialization failed * device limitations.
cudaErrorUnsupportedLimit = 42, ///< ::cudaLimit not su */
pported by device cudaErrorInvalidConfiguration = 9,
cudaErrorDuplicateVariableName = 43, ///< Duplicate global v
ariable lookup by string name /**
cudaErrorDuplicateTextureName = 44, ///< Duplicate texture * This indicates that the device ordinal supplied by the user does not
lookup by string name * correspond to a valid CUDA device.
cudaErrorDuplicateSurfaceName = 45, ///< Duplicate surface */
lookup by string name cudaErrorInvalidDevice = 10,
cudaErrorDevicesUnavailable = 46, ///< All Cuda-capable d
evices are busy (see ::cudaComputeMode) or unavailable /**
cudaErrorStartupFailure = 0x7f, ///< Startup failure * This indicates that one or more of the parameters passed to the API ca
cudaErrorApiFailureBase = 10000 ///< API failure base ll
* is not within an acceptable range of values.
*/
cudaErrorInvalidValue = 11,
/**
* This indicates that one or more of the pitch-related parameters passed
* to the API call is not within the acceptable range for pitch.
*/
cudaErrorInvalidPitchValue = 12,
/**
* This indicates that the symbol name/identifier passed to the API call
* is not a valid name or identifier.
*/
cudaErrorInvalidSymbol = 13,
/**
* This indicates that the buffer object could not be mapped.
*/
cudaErrorMapBufferObjectFailed = 14,
/**
* This indicates that the buffer object could not be unmapped.
*/
cudaErrorUnmapBufferObjectFailed = 15,
/**
* This indicates that at least one host pointer passed to the API call i
s
* not a valid host pointer.
*/
cudaErrorInvalidHostPointer = 16,
/**
* This indicates that at least one device pointer passed to the API call
is
* not a valid device pointer.
*/
cudaErrorInvalidDevicePointer = 17,
/**
* This indicates that the texture passed to the API call is not a valid
* texture.
*/
cudaErrorInvalidTexture = 18,
/**
* This indicates that the texture binding is not valid. This occurs if y
ou
* call ::cudaGetTextureAlignmentOffset() with an unbound texture.
*/
cudaErrorInvalidTextureBinding = 19,
/**
* This indicates that the channel descriptor passed to the API call is n
ot
* valid. This occurs if the format is not one of the formats specified b
y
* ::cudaChannelFormatKind, or if one of the dimensions is invalid.
*/
cudaErrorInvalidChannelDescriptor = 20,
/**
* This indicates that the direction of the memcpy passed to the API call
is
* not one of the types specified by ::cudaMemcpyKind.
*/
cudaErrorInvalidMemcpyDirection = 21,
/**
* This indicated that the user has taken the address of a constant varia
ble,
* which was forbidden up until the CUDA 3.1 release.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Variables in constant
* memory may now have their address taken by the runtime via
* ::cudaGetSymbolAddress().
*/
cudaErrorAddressOfConstant = 22,
/**
* This indicated that a texture fetch was not able to be performed.
* This was previously used for device emulation of texture operations.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Device emulation mode
was
* removed with the CUDA 3.1 release.
*/
cudaErrorTextureFetchFailed = 23,
/**
* This indicated that a texture was not bound for access.
* This was previously used for device emulation of texture operations.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Device emulation mode
was
* removed with the CUDA 3.1 release.
*/
cudaErrorTextureNotBound = 24,
/**
* This indicated that a synchronization operation had failed.
* This was previously used for some device emulation functions.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Device emulation mode
was
* removed with the CUDA 3.1 release.
*/
cudaErrorSynchronizationError = 25,
/**
* This indicates that a non-float texture was being accessed with linear
* filtering. This is not supported by CUDA.
*/
cudaErrorInvalidFilterSetting = 26,
/**
* This indicates that an attempt was made to read a non-float texture as
a
* normalized float. This is not supported by CUDA.
*/
cudaErrorInvalidNormSetting = 27,
/**
* Mixing of device and device emulation code was not allowed.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Device emulation mode
was
* removed with the CUDA 3.1 release.
*/
cudaErrorMixedDeviceExecution = 28,
/**
* This indicated an issue with calling API functions during the unload
* process of the CUDA runtime in prior releases.
* \deprecated
* This error return is deprecated as of CUDA 3.2.
*/
cudaErrorCudartUnloading = 29,
/**
* This indicates that an unknown internal error has occurred.
*/
cudaErrorUnknown = 30,
/**
* This indicates that the API call is not yet implemented. Production
* releases of CUDA will never return this error.
*/
cudaErrorNotYetImplemented = 31,
/**
* This indicated that an emulated device pointer exceeded the 32-bit add
ress
* range.
* \deprecated
* This error return is deprecated as of CUDA 3.1. Device emulation mode
was
* removed with the CUDA 3.1 release.
*/
cudaErrorMemoryValueTooLarge = 32,
/**
* This indicates that a resource handle passed to the API call was not
* valid. Resource handles are opaque types like ::cudaStream_t and
* ::cudaEvent_t.
*/
cudaErrorInvalidResourceHandle = 33,
/**
* This indicates that asynchronous operations issued previously have not
* completed yet. This result is not actually an error, but must be indic
ated
* differently than ::cudaSuccess (which indicates completion). Calls tha
t
* may return this value include ::cudaEventQuery() and ::cudaStreamQuery
().
*/
cudaErrorNotReady = 34,
/**
* This indicates that the installed NVIDIA CUDA driver is older than the
* CUDA runtime library. This is not a supported configuration. Users sho
uld
* install an updated NVIDIA display driver to allow the application to r
un.
*/
cudaErrorInsufficientDriver = 35,
/**
* This indicates that the user has called ::cudaSetDevice(),
* ::cudaSetValidDevices(), ::cudaSetDeviceFlags(),
* ::cudaD3D9SetDirect3DDevice(), ::cudaD3D10SetDirect3DDevice,
* ::cudaD3D11SetDirect3DDevice(), * or ::cudaVDPAUSetVDPAUDevice() after
* initializing the CUDA runtime by calling non-device management operati
ons
* (allocating memory and launching kernels are examples of non-device
* management operations). This error can also be returned if using
* runtime/driver interoperability and there is an existing ::CUcontext
* active on the host thread.
*/
cudaErrorSetOnActiveProcess = 36,
/**
* This indicates that the surface passed to the API call is not a valid
* surface.
*/
cudaErrorInvalidSurface = 37,
/**
* This indicates that no CUDA-capable devices were detected by the insta
lled
* CUDA driver.
*/
cudaErrorNoDevice = 38,
/**
* This indicates that an uncorrectable ECC error was detected during
* execution.
*/
cudaErrorECCUncorrectable = 39,
/**
* This indicates that a link to a shared object failed to resolve.
*/
cudaErrorSharedObjectSymbolNotFound = 40,
/**
* This indicates that initialization of a shared object failed.
*/
cudaErrorSharedObjectInitFailed = 41,
/**
* This indicates that the ::cudaLimit passed to the API call is not
* supported by the active device.
*/
cudaErrorUnsupportedLimit = 42,
/**
* This indicates that multiple global or constant variables (across sepa
rate
* CUDA source files in the application) share the same string name.
*/
cudaErrorDuplicateVariableName = 43,
/**
* This indicates that multiple textures (across separate CUDA source
* files in the application) share the same string name.
*/
cudaErrorDuplicateTextureName = 44,
/**
* This indicates that multiple surfaces (across separate CUDA source
* files in the application) share the same string name.
*/
cudaErrorDuplicateSurfaceName = 45,
/**
* This indicates that all CUDA devices are busy or unavailable at the cu
rrent
* time. Devices are often busy/unavailable due to use of
* ::cudaComputeModeExclusive or ::cudaComputeModeProhibited. They can al
so
* be unavailable due to memory constraints on a device that already has
* active CUDA work being performed.
*/
cudaErrorDevicesUnavailable = 46,
/**
* This indicates that the device kernel image is invalid.
*/
cudaErrorInvalidKernelImage = 47,
/**
* This indicates that there is no kernel image available that is suitabl
e
* for the device. This can occur when a user specifies code generation
* options for a particular CUDA source file that do not include the
* corresponding device configuration.
*/
cudaErrorNoKernelImageForDevice = 48,
/**
* This indicates that the current context is not compatible with this
* version of the CUDA Runtime. This can only occur if you are using CUDA
* Runtime/Driver interoperability and have created an existing Driver
* context using an older API. Please see \ref CUDART_DRIVER
* "Interactions with the CUDA Driver API" for more information.
*/
cudaErrorIncompatibleDriverContext = 49,
/**
* This indicates an internal startup failure in the CUDA runtime.
*/
cudaErrorStartupFailure = 0x7f,
/**
* Any unhandled CUDA driver error is added to this value and returned vi
a
* the runtime. Production releases of CUDA should not return such errors
.
*/
cudaErrorApiFailureBase = 10000
}; };
/** /**
* Channel format kind * Channel format kind
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaChannelFormatKind enum cudaChannelFormatKind
{ {
cudaChannelFormatKindSigned = 0, ///< Signed channel for cudaChannelFormatKindSigned = 0, /**< Signed channel for
mat mat */
cudaChannelFormatKindUnsigned = 1, ///< Unsigned channel f cudaChannelFormatKindUnsigned = 1, /**< Unsigned channel f
ormat ormat */
cudaChannelFormatKindFloat = 2, ///< Float channel form cudaChannelFormatKindFloat = 2, /**< Float channel form
at at */
cudaChannelFormatKindNone = 3 ///< No channel format cudaChannelFormatKindNone = 3 /**< No channel format
*/
}; };
/** /**
* CUDA Channel format descriptor * CUDA Channel format descriptor
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaChannelFormatDesc struct cudaChannelFormatDesc
{ {
int x; ///< x int x; /**< x */
int y; ///< y int y; /**< y */
int z; ///< z int z; /**< z */
int w; ///< w int w; /**< w */
enum cudaChannelFormatKind f; ///< Channel format kind enum cudaChannelFormatKind f; /**< Channel format kind */
}; };
/** /**
* CUDA array * CUDA array
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaArray; struct cudaArray;
/** /**
* CUDA memory copy types * CUDA memory copy types
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaMemcpyKind enum cudaMemcpyKind
{ {
cudaMemcpyHostToHost = 0, ///< Host -> Host cudaMemcpyHostToHost = 0, /**< Host -> Host */
cudaMemcpyHostToDevice = 1, ///< Host -> Device cudaMemcpyHostToDevice = 1, /**< Host -> Device */
cudaMemcpyDeviceToHost = 2, ///< Device -> Host cudaMemcpyDeviceToHost = 2, /**< Device -> Host */
cudaMemcpyDeviceToDevice = 3 ///< Device -> Device cudaMemcpyDeviceToDevice = 3 /**< Device -> Device */
}; };
/** /**
* CUDA Pitched memory pointer * CUDA Pitched memory pointer
* \sa ::make_cudaPitchedPtr * \sa ::make_cudaPitchedPtr
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaPitchedPtr struct cudaPitchedPtr
{ {
void *ptr; ///< Pointer to allocated memory void *ptr; /**< Pointer to allocated memory */
size_t pitch; ///< Pitch of allocated memory in bytes size_t pitch; /**< Pitch of allocated memory in bytes */
size_t xsize; ///< Logical width of allocation in elements size_t xsize; /**< Logical width of allocation in elements */
size_t ysize; ///< Logical height of allocation in elements size_t ysize; /**< Logical height of allocation in elements */
}; };
/** /**
* CUDA extent * CUDA extent
* \sa ::make_cudaExtent * \sa ::make_cudaExtent
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaExtent struct cudaExtent
{ {
size_t width; ///< Width in bytes size_t width; /**< Width in elements when referring to array memory,
size_t height; ///< Height in elements in bytes when referring to linear memory */
size_t depth; ///< Depth in elements size_t height; /**< Height in elements */
size_t depth; /**< Depth in elements */
}; };
/** /**
* CUDA 3D position * CUDA 3D position
* \sa ::make_cudaPos * \sa ::make_cudaPos
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaPos struct cudaPos
{ {
size_t x; ///< x size_t x; /**< x */
size_t y; ///< y size_t y; /**< y */
size_t z; ///< z size_t z; /**< z */
}; };
/** /**
* CUDA 3D memory copying parameters * CUDA 3D memory copying parameters
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaMemcpy3DParms struct cudaMemcpy3DParms
{ {
struct cudaArray *srcArray; ///< Source memory address struct cudaArray *srcArray; /**< Source memory address */
struct cudaPos srcPos; ///< Source position offset struct cudaPos srcPos; /**< Source position offset */
struct cudaPitchedPtr srcPtr; ///< Pitched source memory address struct cudaPitchedPtr srcPtr; /**< Pitched source memory address */
struct cudaArray *dstArray; ///< Destination memory address struct cudaArray *dstArray; /**< Destination memory address */
struct cudaPos dstPos; ///< Destination position offset struct cudaPos dstPos; /**< Destination position offset */
struct cudaPitchedPtr dstPtr; ///< Pitched destination memory address struct cudaPitchedPtr dstPtr; /**< Pitched destination memory address
*/
struct cudaExtent extent; ///< Requested memory copy size struct cudaExtent extent; /**< Requested memory copy size */
enum cudaMemcpyKind kind; ///< Type of transfer enum cudaMemcpyKind kind; /**< Type of transfer */
}; };
/** /**
* CUDA graphics interop resource * CUDA graphics interop resource
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaGraphicsResource; struct cudaGraphicsResource;
/** /**
* CUDA graphics interop register flags * CUDA graphics interop register flags
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaGraphicsRegisterFlags enum cudaGraphicsRegisterFlags
{ {
cudaGraphicsRegisterFlagsNone = 0 ///< Default cudaGraphicsRegisterFlagsNone = 0 /**< Default */
}; };
/** /**
* CUDA graphics interop map flags * CUDA graphics interop map flags
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaGraphicsMapFlags enum cudaGraphicsMapFlags
{ {
cudaGraphicsMapFlagsNone = 0, ///< Default; Assume resource can cudaGraphicsMapFlagsNone = 0, /**< Default; Assume resource can
be read/written be read/written */
cudaGraphicsMapFlagsReadOnly = 1, ///< CUDA will not write to this r cudaGraphicsMapFlagsReadOnly = 1, /**< CUDA will not write to this r
esource esource */
cudaGraphicsMapFlagsWriteDiscard = 2 ///< CUDA will only write to and w cudaGraphicsMapFlagsWriteDiscard = 2 /**< CUDA will only write to and w
ill not read from this resource ill not read from this resource */
}; };
/** /**
* CUDA graphics interop array indices for cube maps * CUDA graphics interop array indices for cube maps
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaGraphicsCubeFace { enum cudaGraphicsCubeFace {
cudaGraphicsCubeFacePositiveX = 0x00, ///< Positive X face of cubemap cudaGraphicsCubeFacePositiveX = 0x00, /**< Positive X face of cubemap */
cudaGraphicsCubeFaceNegativeX = 0x01, ///< Negative X face of cubemap cudaGraphicsCubeFaceNegativeX = 0x01, /**< Negative X face of cubemap */
cudaGraphicsCubeFacePositiveY = 0x02, ///< Positive Y face of cubemap cudaGraphicsCubeFacePositiveY = 0x02, /**< Positive Y face of cubemap */
cudaGraphicsCubeFaceNegativeY = 0x03, ///< Negative Y face of cubemap cudaGraphicsCubeFaceNegativeY = 0x03, /**< Negative Y face of cubemap */
cudaGraphicsCubeFacePositiveZ = 0x04, ///< Positive Z face of cubemap cudaGraphicsCubeFacePositiveZ = 0x04, /**< Positive Z face of cubemap */
cudaGraphicsCubeFaceNegativeZ = 0x05 ///< Negative Z face of cubemap cudaGraphicsCubeFaceNegativeZ = 0x05 /**< Negative Z face of cubemap */
}; };
/** /**
* CUDA function attributes * CUDA function attributes
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaFuncAttributes struct cudaFuncAttributes
{ {
size_t sharedSizeBytes; ///< Size of shared memory in bytes /**
size_t constSizeBytes; ///< Size of constant memory in bytes * The size in bytes of statically-allocated shared memory per block
size_t localSizeBytes; ///< Size of local memory in bytes * required by this function. This does not include dynamically-allocate
int maxThreadsPerBlock; ///< Maximum number of threads per block d
int numRegs; ///< Number of registers used * shared memory requested by the user at runtime.
/** \brief PTX virtual architecture version for which the function was */
* compiled. This value is the major PTX version * 10 + the minor PTX size_t sharedSizeBytes;
* version, so a PTX version 1.3 function would return the value 13.
* For device emulation kernels, this is set to 9999. /**
* The size in bytes of user-allocated constant memory required by this
* function.
*/
size_t constSizeBytes;
/**
* The size in bytes of local memory used by each thread of this functio
n.
*/
size_t localSizeBytes;
/**
* The maximum number of threads per block, beyond which a launch of the
* function would fail. This number depends on both the function and the
* device on which the function is currently loaded.
*/
int maxThreadsPerBlock;
/**
* The number of registers used by each thread of this function.
*/
int numRegs;
/**
* The PTX virtual architecture version for which the function was
* compiled. This value is the major PTX version * 10 + the minor PTX
* version, so a PTX version 1.3 function would return the value 13.
*/ */
int ptxVersion; int ptxVersion;
/** \brief Binary architecture version for which the function was compil
ed. /**
* This value is the major binary version * 10 + the minor binary versi * The binary architecture version for which the function was compiled.
on, * This value is the major binary version * 10 + the minor binary versio
* so a binary version 1.3 function would return the value 13. n,
* For device emulation kernels, this is set to 9999. * so a binary version 1.3 function would return the value 13.
*/ */
int binaryVersion; int binaryVersion;
int __cudaReserved[6]; int __cudaReserved[6];
}; };
/** /**
* CUDA function cache configurations * CUDA function cache configurations
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaFuncCache enum cudaFuncCache
{ {
cudaFuncCachePreferNone = 0, ///< Default function cache configurati cudaFuncCachePreferNone = 0, /**< Default function cache configurati
on, no preference on, no preference */
cudaFuncCachePreferShared = 1, ///< Prefer larger shared memory and sm cudaFuncCachePreferShared = 1, /**< Prefer larger shared memory and sm
aller L1 cache aller L1 cache */
cudaFuncCachePreferL1 = 2 ///< Prefer larger L1 cache and smaller cudaFuncCachePreferL1 = 2 /**< Prefer larger L1 cache and smaller
shared memory shared memory */
}; };
/** /**
* CUDA device compute modes * CUDA device compute modes
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaComputeMode enum cudaComputeMode
{ {
cudaComputeModeDefault = 0, ///< Default compute mode (Multiple thr cudaComputeModeDefault = 0, /**< Default compute mode (Multiple thr
eads can use ::cudaSetDevice() with this device) eads can use ::cudaSetDevice() with this device) */
cudaComputeModeExclusive = 1, ///< Compute-exclusive mode (Only one t cudaComputeModeExclusive = 1, /**< Compute-exclusive mode (Only one t
hread will be able to use ::cudaSetDevice() with this device) hread will be able to use ::cudaSetDevice() with this device) */
cudaComputeModeProhibited = 2 ///< Compute-prohibited mode (No thread cudaComputeModeProhibited = 2 /**< Compute-prohibited mode (No thread
s can use ::cudaSetDevice() with this device) s can use ::cudaSetDevice() with this device) */
}; };
/** /**
* CUDA Limits * CUDA Limits
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaLimit enum cudaLimit
{ {
cudaLimitStackSize = 0x00, ///< GPU thread stack size cudaLimitStackSize = 0x00, /**< GPU thread stack size */
cudaLimitPrintfFifoSize = 0x01 ///< GPU printf FIFO size cudaLimitPrintfFifoSize = 0x01, /**< GPU printf FIFO size */
cudaLimitMallocHeapSize = 0x02 /**< GPU malloc heap size */
}; };
/** /**
* CUDA device properties * CUDA device properties
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct cudaDeviceProp struct cudaDeviceProp
{ {
char name[256]; ///< ASCII string identifying device char name[256]; /**< ASCII string identifying device */
size_t totalGlobalMem; ///< Global memory available on device size_t totalGlobalMem; /**< Global memory available on device
in bytes in bytes */
size_t sharedMemPerBlock; ///< Shared memory available per block size_t sharedMemPerBlock; /**< Shared memory available per block
in bytes in bytes */
int regsPerBlock; ///< 32-bit registers available per blo int regsPerBlock; /**< 32-bit registers available per blo
ck ck */
int warpSize; ///< Warp size in threads int warpSize; /**< Warp size in threads */
size_t memPitch; ///< Maximum pitch in bytes allowed by size_t memPitch; /**< Maximum pitch in bytes allowed by
memory copies memory copies */
int maxThreadsPerBlock; ///< Maximum number of threads per bloc int maxThreadsPerBlock; /**< Maximum number of threads per bloc
k k */
int maxThreadsDim[3]; ///< Maximum size of each dimension of int maxThreadsDim[3]; /**< Maximum size of each dimension of
a block a block */
int maxGridSize[3]; ///< Maximum size of each dimension of int maxGridSize[3]; /**< Maximum size of each dimension of
a grid a grid */
int clockRate; ///< Clock frequency in kilohertz int clockRate; /**< Clock frequency in kilohertz */
size_t totalConstMem; ///< Constant memory available on devic size_t totalConstMem; /**< Constant memory available on devic
e in bytes e in bytes */
int major; ///< Major compute capability int major; /**< Major compute capability */
int minor; ///< Minor compute capability int minor; /**< Minor compute capability */
size_t textureAlignment; ///< Alignment requirement for textures size_t textureAlignment; /**< Alignment requirement for textures
int deviceOverlap; ///< Device can concurrently copy memor */
y and execute a kernel int deviceOverlap; /**< Device can concurrently copy memor
int multiProcessorCount; ///< Number of multiprocessors on devic y and execute a kernel */
e int multiProcessorCount; /**< Number of multiprocessors on devic
int kernelExecTimeoutEnabled; ///< Specified whether there is a run t e */
ime limit on kernels int kernelExecTimeoutEnabled; /**< Specified whether there is a run t
int integrated; ///< Device is integrated as opposed to ime limit on kernels */
discrete int integrated; /**< Device is integrated as opposed to
int canMapHostMemory; ///< Device can map host memory with cu discrete */
daHostAlloc/cudaHostGetDevicePointer int canMapHostMemory; /**< Device can map host memory with cu
int computeMode; ///< Compute mode (See ::cudaComputeMod daHostAlloc/cudaHostGetDevicePointer */
e) int computeMode; /**< Compute mode (See ::cudaComputeMod
int maxTexture1D; ///< Maximum 1D texture size e) */
int maxTexture2D[2]; ///< Maximum 2D texture dimensions int maxTexture1D; /**< Maximum 1D texture size */
int maxTexture3D[3]; ///< Maximum 3D texture dimensions int maxTexture2D[2]; /**< Maximum 2D texture dimensions */
int maxTexture2DArray[3]; ///< Maximum 2D texture array dimension int maxTexture3D[3]; /**< Maximum 3D texture dimensions */
s int maxTexture2DArray[3]; /**< Maximum 2D texture array dimension
size_t surfaceAlignment; ///< Alignment requirements for surface s */
s size_t surfaceAlignment; /**< Alignment requirements for surface
int concurrentKernels; ///< Device can possibly execute multip s */
le kernels concurrently int concurrentKernels; /**< Device can possibly execute multip
int ECCEnabled; ///< Device has ECC support enabled le kernels concurrently */
int pciBusID; ///< PCI bus ID of the device int ECCEnabled; /**< Device has ECC support enabled */
int pciDeviceID; ///< PCI device ID of the device int pciBusID; /**< PCI bus ID of the device */
int __cudaReserved[22]; int pciDeviceID; /**< PCI device ID of the device */
int tccDriver; /**< 1 if device is a Tesla device usin
g TCC driver, 0 otherwise */
int __cudaReserved[21];
}; };
#define cudaDevicePropDontCare \ #define cudaDevicePropDontCare \
{ \ { \
{'\0'}, /* char name[256]; */ \ {'\0'}, /* char name[256]; */ \
0, /* size_t totalGlobalMem; */ \ 0, /* size_t totalGlobalMem; */ \
0, /* size_t sharedMemPerBlock; */ \ 0, /* size_t sharedMemPerBlock; */ \
0, /* int regsPerBlock; */ \ 0, /* int regsPerBlock; */ \
0, /* int warpSize; */ \ 0, /* int warpSize; */ \
0, /* size_t memPitch; */ \ 0, /* size_t memPitch; */ \
skipping to change at line 412 skipping to change at line 751
0, /* int kernelExecTimeoutEnabled */ \ 0, /* int kernelExecTimeoutEnabled */ \
0, /* int integrated */ \ 0, /* int integrated */ \
0, /* int canMapHostMemory */ \ 0, /* int canMapHostMemory */ \
0, /* int computeMode */ \ 0, /* int computeMode */ \
0, /* int maxTexture1D */ \ 0, /* int maxTexture1D */ \
{0, 0}, /* int maxTexture2D[2] */ \ {0, 0}, /* int maxTexture2D[2] */ \
{0, 0, 0}, /* int maxTexture3D[3] */ \ {0, 0, 0}, /* int maxTexture3D[3] */ \
{0, 0, 0}, /* int maxTexture2DArray[3] */ \ {0, 0, 0}, /* int maxTexture2DArray[3] */ \
0, /* size_t surfaceAlignment */ \ 0, /* size_t surfaceAlignment */ \
0, /* int concurrentKernels */ \ 0, /* int concurrentKernels */ \
0 /* int ECCEnabled */ \ 0, /* int ECCEnabled */ \
} ///< Empty device properties 0, /* int pciBusID */ \
0, /* int pciDeviceID */ \
0 /* int tccDriver */ \
} /**< Empty device properties */
/************************************************************************** ***** /************************************************************************** *****
* * * *
* SHORTHAND TYPE DEFINITION USED BY RUNTIME API * * SHORTHAND TYPE DEFINITION USED BY RUNTIME API *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/** /**
* CUDA Error types * CUDA Error types
*/ */
skipping to change at line 440 skipping to change at line 782
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
typedef struct CUstream_st *cudaStream_t; typedef struct CUstream_st *cudaStream_t;
/** /**
* CUDA event types * CUDA event types
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
typedef struct CUevent_st *cudaEvent_t; typedef struct CUevent_st *cudaEvent_t;
/** /**
* CUDA graphics resource types
*/
/*DEVICE_BUILTIN*/
typedef struct cudaGraphicsResource *cudaGraphicsResource_t;
/**
* CUDA UUID types * CUDA UUID types
*/ */
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
typedef struct CUuuid_st cudaUUID_t; typedef struct CUuuid_st cudaUUID_t;
/** @} */ /** @} */
/** @} */ /* END CUDART_TYPES */ /** @} */ /* END CUDART_TYPES */
#endif /* !__DRIVER_TYPES_H__ */ #endif /* !__DRIVER_TYPES_H__ */
 End of changes. 26 change blocks. 
247 lines changed or deleted 598 lines changed or added


 host_config.h   host_config.h 
skipping to change at line 104 skipping to change at line 104
#endif /* !NOMINMAX */ #endif /* !NOMINMAX */
#include <crtdefs.h> /* for _CRTIMP */ #include <crtdefs.h> /* for _CRTIMP */
#define __THROW #define __THROW
#endif /* __APPLE__ */ #endif /* __APPLE__ */
#endif /* __CUDACC__ */ #endif /* __CUDACC__ */
#if defined(__ICC)
#if __ICC != 1110 || !defined(__GNUC__) || !defined(__LP64__)
#error -- unsupported ICC configuration! Only ICC 11.1 on Linux x86_64 is s
upported!
#endif /* __ICC != 1110 || !__GNUC__ || !__LP64__ */
#endif /* __ICC */
#endif /* !__HOST_CONFIG_H__ */ #endif /* !__HOST_CONFIG_H__ */
 End of changes. 1 change blocks. 
0 lines changed or deleted 11 lines changed or added


 host_runtime.h   host_runtime.h 
skipping to change at line 85 skipping to change at line 85
__cudaFatCubinHandle = __cudaRegisterFatBinary((void*)&__fatDeviceT ext); \ __cudaFatCubinHandle = __cudaRegisterFatBinary((void*)&__fatDeviceT ext); \
atexit(__cudaUnregisterBinaryUtil) atexit(__cudaUnregisterBinaryUtil)
#define __cudaRegisterVariable(var, ext, size, constant, global) \ #define __cudaRegisterVariable(var, ext, size, constant, global) \
__cudaRegisterVar(__cudaFatCubinHandle, (char*)&__host##var, (char* )__device##var, __name##var, ext, size, constant, global) __cudaRegisterVar(__cudaFatCubinHandle, (char*)&__host##var, (char* )__device##var, __name##var, ext, size, constant, global)
#define __cudaRegisterGlobalTexture(tex, dim, norm, ext) \ #define __cudaRegisterGlobalTexture(tex, dim, norm, ext) \
__cudaRegisterTexture(__cudaFatCubinHandle, (const struct textureRe ference*)&tex, (const void**)__device##tex, __name##tex, dim, norm, ext) __cudaRegisterTexture(__cudaFatCubinHandle, (const struct textureRe ference*)&tex, (const void**)__device##tex, __name##tex, dim, norm, ext)
#define __cudaRegisterGlobalSurface(surf, dim, ext) \ #define __cudaRegisterGlobalSurface(surf, dim, ext) \
__cudaRegisterSurface(__cudaFatCubinHandle, (const struct surfaceRe ference*)&surf, (const void**)__device##surf, __name##surf, dim, ext) __cudaRegisterSurface(__cudaFatCubinHandle, (const struct surfaceRe ference*)&surf, (const void**)__device##surf, __name##surf, dim, ext)
#define __cudaRegisterEntry(funptr, fun, thread_limit) \ #define __cudaRegisterEntry(funptr, fun, thread_limit) \
__cudaRegisterFunction(__cudaFatCubinHandle, (const char*)funptr, ( char*)__device_fun(fun), #fun, -1, (uint3*)0, (uint3*)0, (dim3*)0, (dim3*)0 , (int*)0) __cudaRegisterFunction(__cudaFatCubinHandle, (const char*)funptr, ( char*)__device_fun(fun), #fun, -1, (uint3*)0, (uint3*)0, (dim3*)0, (dim3*)0 , (int*)0)
#define __cudaInitArgBlock(arg) \
*(void**)(void*)&arg = (void*)0
#define __cudaSetupArg(arg, offset) \ #define __cudaSetupArg(arg, offset) \
if (cudaSetupArgument((void*)(char*)&arg, sizeof(arg), (size_t)&off set->arg) != cudaSuccess) \ if (cudaSetupArgument((void*)(char*)&arg, sizeof(arg), (size_t)&off set->arg) != cudaSuccess) \
return return
#define __cudaLaunch(fun) \ #define __cudaLaunch(fun) \
{ volatile static char *__f; __f = fun; (void)cudaLaunch(fun); } { volatile static char *__f; __f = fun; (void)cudaLaunch(fun); }
extern "C" { extern "C" {
extern void** CUDARTAPI __cudaRegisterFatBinary( extern void** CUDARTAPI __cudaRegisterFatBinary(
void *fatCubin void *fatCubin
skipping to change at line 167 skipping to change at line 165
static void **__cudaFatCubinHandle; static void **__cudaFatCubinHandle;
static void __cdecl __cudaUnregisterBinaryUtil(void) static void __cdecl __cudaUnregisterBinaryUtil(void)
{ {
__cudaUnregisterFatBinary(__cudaFatCubinHandle); __cudaUnregisterFatBinary(__cudaFatCubinHandle);
} }
#include "common_functions.h" #include "common_functions.h"
#if defined(__APPLE__)
#pragma options align=natural
#else /* __APPLE__ */
#pragma pack()
#if defined(_WIN32) #if defined(_WIN32)
#pragma warning(disable: 4099) #pragma warning(disable: 4099)
#if !defined(_WIN64) #if !defined(_WIN64)
#pragma warning(disable: 4408) #pragma warning(disable: 4408)
#endif /* !_WIN64 */ #endif /* !_WIN64 */
#endif /* _WIN32 */ #endif /* _WIN32 */
#endif /* __APPLE__ */
#endif /* !__CUDA_INTERNAL_COMPILATION__ */ #endif /* !__CUDA_INTERNAL_COMPILATION__ */
 End of changes. 3 change blocks. 
2 lines changed or deleted 10 lines changed or added


 math_functions_dbl_ptx3.h   math_functions_dbl_ptx3.h 
skipping to change at line 702 skipping to change at line 702
t = a / t; t = a / t;
t = -a * t; t = -a * t;
t = __internal_atanh_kernel(a, t); t = __internal_atanh_kernel(a, t);
return t; return t;
} }
return log (a + CUDART_ONE); return log (a + CUDART_ONE);
} }
static __forceinline__ double __internal_exp_kernel(double a, int scale) static __forceinline__ double __internal_exp_kernel(double a, int scale)
{ {
double t, fac, z; double t, z;
int i, k; int i, j, k;
/* exp(a) = 2^(rint(a/log(2)) + z) = 2^(i + z) */
t = rint (a * CUDART_L2E); t = rint (a * CUDART_L2E);
i = (int)t; i = (int)t;
z = __fma_rn (t, -CUDART_LN2_HI, a); z = __fma_rn (t, -CUDART_LN2_HI, a);
z = __fma_rn (t, -CUDART_LN2_LO, z); z = __fma_rn (t, -CUDART_LN2_LO, z);
k = 0x40000000; t = __internal_expm1_kernel (z);
if (i <= -1021) { k = ((i + scale) << 20) + (1023 << 20);
i += 55; if (abs(i) < 1021) {
k -= 55 << 20; z = __hiloint2double (k, 0);
z = __fma_rn (t, z, z);
} else {
j = 0x40000000;
if (i < 0) {
k += (55 << 20);
j -= (55 << 20);
}
k = k - (1 << 20);
z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise *
/
t = __fma_rn (t, z, z);
z = __hiloint2double (k, 0);
z = t * z;
} }
fac = __hiloint2double(k, 0); /* 2^-54 if a is denormal, 2.0 otherwise */ return z;
/* exp(a) = 2^i * e^z */
t = __internal_expm1_kernel(z);
z = __hiloint2double(((i + scale) << 20) + ((-1 + 1023) << 20), 0);
t = __fma_rn (t, z, z);
t = t * fac;
return t;
} }
static __forceinline__ double exp(double a) static __forceinline__ double exp(double a)
{ {
double t; double t;
int i; int i;
i = __double2hiint(a); i = __double2hiint(a);
if (((unsigned)i < (unsigned)0x40862e43) || ((int)i < (int)0xC0874911)) { if (((unsigned)i < (unsigned)0x40862e43) || ((int)i < (int)0xC0874911)) {
t = __internal_exp_kernel(a, 0); t = __internal_exp_kernel(a, 0);
return t; return t;
} }
t = (i < 0) ? CUDART_ZERO : CUDART_INF; t = (i < 0) ? CUDART_ZERO : CUDART_INF;
if (__isnan(a)) { if (__isnan(a)) {
t = a + a; t = a + a;
} }
return t; return t;
} }
static __forceinline__ double exp2(double a) static __forceinline__ double exp2(double a)
{ {
double z; double t, z;
double t; int i, j, k;
double fac;
int i;
i = __double2hiint(a); i = __double2hiint(a);
if (((unsigned)i < (unsigned)0x40900000) || ((int)i < (int)0xc090cc00)) { if (((unsigned)i < (unsigned)0x40900000) || ((int)i < (int)0xc090cc00)) {
t = rint (a); t = rint (a);
z = a - t; z = a - t;
i = (int)t; i = (int)t;
fac = 2.0;
if (i <= -1021) {
i += 55;
fac = CUDART_TWO_TO_M54;
}
/* 2^z = exp(log(2)*z) */ /* 2^z = exp(log(2)*z) */
z = __fma_rn (z, CUDART_LN2_HI, z * CUDART_LN2_LO); z = __fma_rn (z, CUDART_LN2_HI, z * CUDART_LN2_LO);
t = __internal_expm1_kernel(z); t = __internal_expm1_kernel(z);
z = __internal_exp2i_kernel(i - 1); k = (i << 20) + (1023 << 20);
t = __fma_rn (t, z, z); if (abs(i) < 1023) {
t = t * fac; z = __hiloint2double (k, 0);
return t; z = __fma_rn (t, z, z);
} else {
j = 0x40000000;
if (i < 0) {
k += (55 << 20);
j -= (55 << 20);
}
k = k - (1 << 20);
z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise
*/
t = __fma_rn (t, z, z);
z = __hiloint2double (k, 0);
z = t * z;
}
return z;
} }
t = (i < 0) ? CUDART_ZERO : CUDART_INF; t = (i < 0) ? CUDART_ZERO : CUDART_INF;
if (__isnan(a)) { if (__isnan(a)) {
t = a + a; t = a + a;
} }
return t; return t;
} }
static __forceinline__ double exp10(double a) static __forceinline__ double exp10(double a)
{ {
double z; double z;
double t; double t;
double fac; int i, j, k;
int i;
i = __double2hiint(a); i = __double2hiint(a);
if (((unsigned)i < (unsigned)0x40734414) || ((int)i < (int)0xc07439b8)) { if (((unsigned)i < (unsigned)0x40734414) || ((int)i < (int)0xc07439b8)) {
t = rint (a * CUDART_L2T); t = rint (a * CUDART_L2T);
i = (int)t; i = (int)t;
z = __fma_rn (t, -CUDART_LG2_HI, a); z = __fma_rn (t, -CUDART_LG2_HI, a);
z = __fma_rn (t, -CUDART_LG2_LO, z); z = __fma_rn (t, -CUDART_LG2_LO, z);
fac = 2.0;
if (i <= -1021) {
i += 55;
fac = CUDART_TWO_TO_M54;
}
/* 2^z = exp(log(10)*z) */ /* 2^z = exp(log(10)*z) */
z = __fma_rn (z, CUDART_LNT_HI, z * CUDART_LNT_LO); z = __fma_rn (z, CUDART_LNT_HI, z * CUDART_LNT_LO);
t = __internal_expm1_kernel(z); t = __internal_expm1_kernel(z);
z = __internal_exp2i_kernel(i - 1); k = (i << 20) + (1023 << 20);
t = __fma_rn (t, z, z); if (abs(i) < 1023) {
t = t * fac; z = __hiloint2double (k, 0);
return t; z = __fma_rn (t, z, z);
} else {
j = 0x40000000;
if (i < 0) {
k += (55 << 20);
j -= (55 << 20);
}
k = k - (1 << 20);
z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise
*/
t = __fma_rn (t, z, z);
z = __hiloint2double (k, 0);
z = t * z;
}
return z;
} }
t = (i < 0) ? CUDART_ZERO : CUDART_INF; t = (i < 0) ? CUDART_ZERO : CUDART_INF;
if (__isnan(a)) { if (__isnan(a)) {
t = a + a; t = a + a;
} }
return t; return t;
} }
static __forceinline__ double expm1(double a) static __forceinline__ double expm1(double a)
{ {
 End of changes. 9 change blocks. 
38 lines changed or deleted 59 lines changed or added


 opencl.h   opencl.h 
/************************************************************************** ***** /************************************************************************** *****
* Copyright (c) 2008-2009 The Khronos Group Inc. * Copyright (c) 2008-2010 The Khronos Group Inc.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the * copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including * "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, * without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to * distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to * permit persons to whom the Materials are furnished to do so, subject to
* the following conditions: * the following conditions:
* *
* The above copyright notice and this permission notice shall be included * The above copyright notice and this permission notice shall be included
skipping to change at line 24 skipping to change at line 24
* *
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
************************************************************************** ****/ ************************************************************************** ****/
/* $Revision$ on $Date$ */ /* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */
#ifndef __OPENCL_H #ifndef __OPENCL_H
#define __OPENCL_H #define __OPENCL_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 surface_types.h   surface_types.h 
skipping to change at line 47 skipping to change at line 47
#define __SURFACE_TYPES_H__ #define __SURFACE_TYPES_H__
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
#include "driver_types.h" #include "driver_types.h"
/**
* \addtogroup CUDART_TYPES
*
* @{
*/
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/**
* CUDA Surface boundary modes
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaSurfaceBoundaryMode enum cudaSurfaceBoundaryMode
{ {
cudaBoundaryModeZero = 0, cudaBoundaryModeZero = 0, /**< Zero boundary mode */
cudaBoundaryModeClamp = 1, cudaBoundaryModeClamp = 1, /**< Clamp boundary mode */
cudaBoundaryModeTrap = 2 cudaBoundaryModeTrap = 2 /**< Trap boundary mode */
}; };
/**
* CUDA Surface format modes
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaSurfaceFormatMode enum cudaSurfaceFormatMode
{ {
cudaFormatModeForced, cudaFormatModeForced = 0, /**< Forced format mode */
cudaFormatModeAuto cudaFormatModeAuto = 1 /**< Auto format mode */
}; };
/**
* CUDA Surface reference
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct surfaceReference struct surfaceReference
{ {
/**
* Channel descriptor for surface reference
*/
struct cudaChannelFormatDesc channelDesc; struct cudaChannelFormatDesc channelDesc;
}; };
/** @} */
/** @} */ /* END CUDART_TYPES */
#endif /* !__SURFACE_TYPES_H__ */ #endif /* !__SURFACE_TYPES_H__ */
 End of changes. 8 change blocks. 
5 lines changed or deleted 26 lines changed or added


 texture_fetch_functions.h   texture_fetch_functions.h 
skipping to change at line 1876 skipping to change at line 1876
} }
static __inline__ __device__ float4 tex3D(texture<ushort4, 3, cudaReadModeN ormalizedFloat> t, float x, float y, float z) static __inline__ __device__ float4 tex3D(texture<ushort4, 3, cudaReadModeN ormalizedFloat> t, float x, float y, float z)
{ {
uint4 v = __utexfetch(t, make_float4(x, y, z, 0)); uint4 v = __utexfetch(t, make_float4(x, y, z, 0));
float4 w = make_float4(__int_as_float(v.x), __int_as_float(v.y), __int_as _float(v.z), __int_as_float(v.w)); float4 w = make_float4(__int_as_float(v.x), __int_as_float(v.y), __int_as _float(v.z), __int_as_float(v.w));
return make_float4(w.x, w.y, w.z, w.w); return make_float4(w.x, w.y, w.z, w.w);
} }
#define __utexfetchi \
000 incorect invocation of builtin __utexfetchi 000
#define __itexfetchi \
000 incorect invocation of builtin __itexfetchi 000
#define __ftexfetchi \
000 incorect invocation of builtin __ftexfetchi 000
#define __utexfetch \
000 incorect invocation of builtin __utexfetch 000
#define __itexfetch \
000 incorect invocation of builtin __itexfetch 000
#define __ftexfetch \
000 incorect invocation of builtin __ftexfetch 000
#elif defined(__CUDABE__) #elif defined(__CUDABE__)
extern uint4 __utexfetchi1D(const void*, int4); extern uint4 __utexfetchi1D(const void*, int4);
extern int4 __itexfetchi1D(const void*, int4); extern int4 __itexfetchi1D(const void*, int4);
extern float4 __ftexfetchi1D(const void*, int4); extern float4 __ftexfetchi1D(const void*, int4);
extern uint4 __utexfetch1D(const void*, float4); extern uint4 __utexfetch1D(const void*, float4);
extern int4 __itexfetch1D(const void*, float4); extern int4 __itexfetch1D(const void*, float4);
extern float4 __ftexfetch1D(const void*, float4); extern float4 __ftexfetch1D(const void*, float4);
extern uint4 __utexfetch2D(const void*, float4); extern uint4 __utexfetch2D(const void*, float4);
extern int4 __itexfetch2D(const void*, float4); extern int4 __itexfetch2D(const void*, float4);
skipping to change at line 1906 skipping to change at line 1919
__ftexfetchi1D(t, i) __ftexfetchi1D(t, i)
#define __utexfetch(t, i, d) \ #define __utexfetch(t, i, d) \
__utexfetch##d##D(t, i) __utexfetch##d##D(t, i)
#define __itexfetch(t, i, d) \ #define __itexfetch(t, i, d) \
__itexfetch##d##D(t, i) __itexfetch##d##D(t, i)
#define __ftexfetch(t, i, d) \ #define __ftexfetch(t, i, d) \
__ftexfetch##d##D(t, i) __ftexfetch##d##D(t, i)
#endif /* __cplusplus && __CUDACC__ */ #endif /* __cplusplus && __CUDACC__ */
#if defined(__cplusplus) && defined(__CUDACC__)
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 200
/**************************************************************************
*****
*
*
*
*
*
*
***************************************************************************
****/
/*DEVICE_BUILTIN*/
template<int comp, class T> extern __device__ int4 __itex2Dgather(texture
<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);
/*DEVICE_BUILTIN*/
template<int comp, class T> extern __device__ uint4 __utex2Dgather(texture
<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);
/*DEVICE_BUILTIN*/
template<int comp, class T> extern __device__ float4 __ftex2Dgather(texture
<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);
#define __tex2DgatherUtil(T, f, r, c) \
{ T v = f<c>(t, make_float2(x, y)); return r; }
#define __tex2DgatherUtil1(T, f, r) \
__tex2DgatherUtil(T, f, r, 0)
#define __tex2DgatherUtil2(T, f, r) \
if (comp == 1) __tex2DgatherUtil(T, f, r, 1) \
else __tex2DgatherUtil1(T, f, r)
#define __tex2DgatherUtil3(T, f, r) \
if (comp == 2) __tex2DgatherUtil(T, f, r, 2) \
else __tex2DgatherUtil2(T, f, r)
#define __tex2DgatherUtil4(T, f, r) \
if (comp == 3) __tex2DgatherUtil(T, f, r, 3) \
else __tex2DgatherUtil3(T, f, r)
static __inline__ __device__ char4 tex2Dgather(texture<char, 2, cudaReadMod
eElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ char4 tex2Dgather(texture<signed char, 2, cuda
ReadModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ uchar4 tex2Dgather(texture<unsigned char, 2, c
udaReadModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
);
}
static __inline__ __device__ char4 tex2Dgather(texture<char1, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ uchar4 tex2Dgather(texture<uchar1, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
);
}
static __inline__ __device__ char4 tex2Dgather(texture<char2, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ uchar4 tex2Dgather(texture<uchar2, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
);
}
static __inline__ __device__ char4 tex2Dgather(texture<char3, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ uchar4 tex2Dgather(texture<uchar3, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
);
}
static __inline__ __device__ char4 tex2Dgather(texture<char4, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
}
static __inline__ __device__ uchar4 tex2Dgather(texture<uchar4, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
);
}
static __inline__ __device__ short4 tex2Dgather(texture<signed short, 2, cu
daReadModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
;
}
static __inline__ __device__ ushort4 tex2Dgather(texture<unsigned short, 2,
cudaReadModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
));
}
static __inline__ __device__ short4 tex2Dgather(texture<short1, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
;
}
static __inline__ __device__ ushort4 tex2Dgather(texture<ushort1, 2, cudaRe
adModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
));
}
static __inline__ __device__ short4 tex2Dgather(texture<short2, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
;
}
static __inline__ __device__ ushort4 tex2Dgather(texture<ushort2, 2, cudaRe
adModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
));
}
static __inline__ __device__ short4 tex2Dgather(texture<short3, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
;
}
static __inline__ __device__ ushort4 tex2Dgather(texture<ushort3, 2, cudaRe
adModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
));
}
static __inline__ __device__ short4 tex2Dgather(texture<short4, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
;
}
static __inline__ __device__ ushort4 tex2Dgather(texture<ushort4, 2, cudaRe
adModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
));
}
static __inline__ __device__ int4 tex2Dgather(texture<signed int, 2, cudaRe
adModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, v);
}
static __inline__ __device__ uint4 tex2Dgather(texture<unsigned int, 2, cud
aReadModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, v);
}
static __inline__ __device__ int4 tex2Dgather(texture<int1, 2, cudaReadMode
ElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(int4, __itex2Dgather, v);
}
static __inline__ __device__ uint4 tex2Dgather(texture<uint1, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(uint4, __utex2Dgather, v);
}
static __inline__ __device__ int4 tex2Dgather(texture<int2, 2, cudaReadMode
ElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(int4, __itex2Dgather, v);
}
static __inline__ __device__ uint4 tex2Dgather(texture<uint2, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(uint4, __utex2Dgather, v);
}
static __inline__ __device__ int4 tex2Dgather(texture<int3, 2, cudaReadMode
ElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(int4, __itex2Dgather, v);
}
static __inline__ __device__ uint4 tex2Dgather(texture<uint3, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(uint4, __utex2Dgather, v);
}
static __inline__ __device__ int4 tex2Dgather(texture<int4, 2, cudaReadMode
ElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(int4, __itex2Dgather, v);
}
static __inline__ __device__ uint4 tex2Dgather(texture<uint4, 2, cudaReadMo
deElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(uint4, __utex2Dgather, v);
}
static __inline__ __device__ float4 tex2Dgather(texture<float, 2, cudaReadM
odeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(float4, __ftex2Dgather, v);
}
static __inline__ __device__ float4 tex2Dgather(texture<float1, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil1(float4, __ftex2Dgather, v);
}
static __inline__ __device__ float4 tex2Dgather(texture<float2, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil2(float4, __ftex2Dgather, v);
}
static __inline__ __device__ float4 tex2Dgather(texture<float3, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil3(float4, __ftex2Dgather, v);
}
static __inline__ __device__ float4 tex2Dgather(texture<float4, 2, cudaRead
ModeElementType> t, float x, float y, int comp = 0)
{
__tex2DgatherUtil4(float4, __ftex2Dgather, v);
}
#undef __tex2DgatherUtil
#undef __tex2DgatherUtil1
#undef __tex2DgatherUtil2
#undef __tex2DgatherUtil3
#undef __tex2DgatherUtil4
#define __utex2Dgather \
000 incorect invocation of builtin __utex2Dgather 000
#define __itex2Dgather \
000 incorect invocation of builtin __itex2Dgather 000
#define __ftex2Dgather \
000 incorect invocation of builtin __ftex2Dgather 000
#endif /* !__CUDA_ARCH__ || __CUDA_ARCH__ >= 200 */
#elif defined(__CUDABE__)
extern uint4 __utex2Dgather0(const void*, float2);
extern uint4 __utex2Dgather1(const void*, float2);
extern uint4 __utex2Dgather2(const void*, float2);
extern uint4 __utex2Dgather3(const void*, float2);
extern int4 __itex2Dgather0(const void*, float2);
extern int4 __itex2Dgather1(const void*, float2);
extern int4 __itex2Dgather2(const void*, float2);
extern int4 __itex2Dgather3(const void*, float2);
extern float4 __ftex2Dgather0(const void*, float2);
extern float4 __ftex2Dgather1(const void*, float2);
extern float4 __ftex2Dgather2(const void*, float2);
extern float4 __ftex2Dgather3(const void*, float2);
#define __utex2Dgather(t, i, c) \
__utex2Dgather##c(t, i)
#define __itex2Dgather(t, i, c) \
__itex2Dgather##c(t, i)
#define __ftex2Dgather(t, i, c) \
__ftex2Dgather##c(t, i)
#endif /* __cplusplus && __CUDACC__ */
#endif /* !__TEXTURE_FETCH_FUNCTIONS_H__ */ #endif /* !__TEXTURE_FETCH_FUNCTIONS_H__ */
 End of changes. 2 change blocks. 
0 lines changed or deleted 326 lines changed or added


 texture_types.h   texture_types.h 
skipping to change at line 47 skipping to change at line 47
#define __TEXTURE_TYPES_H__ #define __TEXTURE_TYPES_H__
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
#include "driver_types.h" #include "driver_types.h"
/**
* \addtogroup CUDART_TYPES
*
* @{
*/
/************************************************************************** ***** /************************************************************************** *****
* * * *
* * * *
* * * *
*************************************************************************** ****/ *************************************************************************** ****/
/**
* CUDA texture address modes
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaTextureAddressMode enum cudaTextureAddressMode
{ {
cudaAddressModeWrap, cudaAddressModeWrap = 0, /**< Wrapping address mode */
cudaAddressModeClamp, cudaAddressModeClamp = 1, /**< Clamp to edge address mode */
cudaAddressModeMirror cudaAddressModeMirror = 2, /**< Mirror address mode */
cudaAddressModeBorder = 3 /**< Border address mode */
}; };
/**
* CUDA texture filter modes
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaTextureFilterMode enum cudaTextureFilterMode
{ {
cudaFilterModePoint, cudaFilterModePoint = 0, /**< Point filter mode */
cudaFilterModeLinear cudaFilterModeLinear = 1 /**< Linear filter mode */
}; };
/**
* CUDA texture read modes
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
enum cudaTextureReadMode enum cudaTextureReadMode
{ {
cudaReadModeElementType, cudaReadModeElementType = 0, /**< Read texture as specified element
cudaReadModeNormalizedFloat type */
cudaReadModeNormalizedFloat = 1 /**< Read texture as normalized float *
/
}; };
/**
* CUDA texture reference
*/
/*DEVICE_BUILTIN*/ /*DEVICE_BUILTIN*/
struct textureReference struct textureReference
{ {
/**
* Indicates whether texture reads are normalized or not
*/
int normalized; int normalized;
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode; enum cudaTextureFilterMode filterMode;
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3]; enum cudaTextureAddressMode addressMode[3];
/**
* Channel descriptor for the texture reference
*/
struct cudaChannelFormatDesc channelDesc; struct cudaChannelFormatDesc channelDesc;
int __cudaReserved[16]; int __cudaReserved[16];
}; };
/** @} */
/** @} */ /* END CUDART_TYPES */
#endif /* !__TEXTURE_TYPES_H__ */ #endif /* !__TEXTURE_TYPES_H__ */
 End of changes. 13 change blocks. 
7 lines changed or deleted 43 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/