cuda: headers diff between 3.1 and 3.2.16 versions

	cl.h	cl.h
	/************************************************************************ ***	/************************************************************************ ***

	* Copyright (c) 2008-2009 The Khronos Group Inc.	* Copyright (c) 2008-2010 The Khronos Group Inc.
	*	*
	* Permission is hereby granted, free of charge, to any person obtaining a	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and/or associated documentation files (the	* copy of this software and/or associated documentation files (the
	* "Materials"), to deal in the Materials without restriction, including	* "Materials"), to deal in the Materials without restriction, including
	* without limitation the rights to use, copy, modify, merge, publish,	* without limitation the rights to use, copy, modify, merge, publish,
	* distribute, sublicense, and/or sell copies of the Materials, and to	* distribute, sublicense, and/or sell copies of the Materials, and to
	* permit persons to whom the Materials are furnished to do so, subject to	* permit persons to whom the Materials are furnished to do so, subject to
	* the following conditions:	* the following conditions:
	*	*
	* The above copyright notice and this permission notice shall be included	* The above copyright notice and this permission notice shall be included

	skipping to change at line 24	skipping to change at line 24
	*	*
	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
	************************************************************************ **/	************************************************************************ **/


	/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */	/* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */

	#ifndef __OPENCL_CL_H	#ifndef __OPENCL_CL_H
	#define __OPENCL_CL_H	#define __OPENCL_CL_H

	#ifdef __APPLE__	#ifdef __APPLE__
	#include <OpenCL/cl_platform.h>	#include <OpenCL/cl_platform.h>
	#else	#else
	#include <CL/cl_platform.h>	#include <CL/cl_platform.h>
	#endif	#endif


	skipping to change at line 56	skipping to change at line 56
	typedef struct _cl_program * cl_program;	typedef struct _cl_program * cl_program;
	typedef struct _cl_kernel * cl_kernel;	typedef struct _cl_kernel * cl_kernel;
	typedef struct _cl_event * cl_event;	typedef struct _cl_event * cl_event;
	typedef struct _cl_sampler * cl_sampler;	typedef struct _cl_sampler * cl_sampler;

	typedef cl_uint cl_bool; /* WARNING! Unlik e cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */	typedef cl_uint cl_bool; /* WARNING! Unlik e cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
	typedef cl_ulong cl_bitfield;	typedef cl_ulong cl_bitfield;
	typedef cl_bitfield cl_device_type;	typedef cl_bitfield cl_device_type;
	typedef cl_uint cl_platform_info;	typedef cl_uint cl_platform_info;
	typedef cl_uint cl_device_info;	typedef cl_uint cl_device_info;

	typedef cl_bitfield cl_device_address_info;
	typedef cl_bitfield cl_device_fp_config;	typedef cl_bitfield cl_device_fp_config;
	typedef cl_uint cl_device_mem_cache_type;	typedef cl_uint cl_device_mem_cache_type;
	typedef cl_uint cl_device_local_mem_type;	typedef cl_uint cl_device_local_mem_type;
	typedef cl_bitfield cl_device_exec_capabilities;	typedef cl_bitfield cl_device_exec_capabilities;
	typedef cl_bitfield cl_command_queue_properties;	typedef cl_bitfield cl_command_queue_properties;

	typedef intptr_t cl_context_properties;	typedef intptr_t cl_context_properties;
	typedef cl_uint cl_context_info;	typedef cl_uint cl_context_info;
	typedef cl_uint cl_command_queue_info;	typedef cl_uint cl_command_queue_info;
	typedef cl_uint cl_channel_order;	typedef cl_uint cl_channel_order;
	typedef cl_uint cl_channel_type;	typedef cl_uint cl_channel_type;
	typedef cl_bitfield cl_mem_flags;	typedef cl_bitfield cl_mem_flags;
	typedef cl_uint cl_mem_object_type;	typedef cl_uint cl_mem_object_type;
	typedef cl_uint cl_mem_info;	typedef cl_uint cl_mem_info;
	typedef cl_uint cl_image_info;	typedef cl_uint cl_image_info;

		typedef cl_uint cl_buffer_create_type;
	typedef cl_uint cl_addressing_mode;	typedef cl_uint cl_addressing_mode;
	typedef cl_uint cl_filter_mode;	typedef cl_uint cl_filter_mode;
	typedef cl_uint cl_sampler_info;	typedef cl_uint cl_sampler_info;
	typedef cl_bitfield cl_map_flags;	typedef cl_bitfield cl_map_flags;
	typedef cl_uint cl_program_info;	typedef cl_uint cl_program_info;
	typedef cl_uint cl_program_build_info;	typedef cl_uint cl_program_build_info;
	typedef cl_int cl_build_status;	typedef cl_int cl_build_status;
	typedef cl_uint cl_kernel_info;	typedef cl_uint cl_kernel_info;
	typedef cl_uint cl_kernel_work_group_info;	typedef cl_uint cl_kernel_work_group_info;
	typedef cl_uint cl_event_info;	typedef cl_uint cl_event_info;
	typedef cl_uint cl_command_type;	typedef cl_uint cl_command_type;
	typedef cl_uint cl_profiling_info;	typedef cl_uint cl_profiling_info;

	typedef struct _cl_image_format {	typedef struct _cl_image_format {
	cl_channel_order image_channel_order;	cl_channel_order image_channel_order;
	cl_channel_type image_channel_data_type;	cl_channel_type image_channel_data_type;
	} cl_image_format;	} cl_image_format;


		typedef struct _cl_buffer_region {
		size_t origin;
		size_t size;
		} cl_buffer_region;

	/************************************************************************ **/	/************************************************************************ **/

	/* Error Codes */	/* Error Codes */
	#define CL_SUCCESS 0	#define CL_SUCCESS 0
	#define CL_DEVICE_NOT_FOUND -1	#define CL_DEVICE_NOT_FOUND -1
	#define CL_DEVICE_NOT_AVAILABLE -2	#define CL_DEVICE_NOT_AVAILABLE -2
	#define CL_COMPILER_NOT_AVAILABLE -3	#define CL_COMPILER_NOT_AVAILABLE -3
	#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4	#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
	#define CL_OUT_OF_RESOURCES -5	#define CL_OUT_OF_RESOURCES -5
	#define CL_OUT_OF_HOST_MEMORY -6	#define CL_OUT_OF_HOST_MEMORY -6
	#define CL_PROFILING_INFO_NOT_AVAILABLE -7	#define CL_PROFILING_INFO_NOT_AVAILABLE -7
	#define CL_MEM_COPY_OVERLAP -8	#define CL_MEM_COPY_OVERLAP -8
	#define CL_IMAGE_FORMAT_MISMATCH -9	#define CL_IMAGE_FORMAT_MISMATCH -9
	#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10	#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
	#define CL_BUILD_PROGRAM_FAILURE -11	#define CL_BUILD_PROGRAM_FAILURE -11
	#define CL_MAP_FAILURE -12	#define CL_MAP_FAILURE -12

		#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
		#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14

	#define CL_INVALID_VALUE -30	#define CL_INVALID_VALUE -30
	#define CL_INVALID_DEVICE_TYPE -31	#define CL_INVALID_DEVICE_TYPE -31
	#define CL_INVALID_PLATFORM -32	#define CL_INVALID_PLATFORM -32
	#define CL_INVALID_DEVICE -33	#define CL_INVALID_DEVICE -33
	#define CL_INVALID_CONTEXT -34	#define CL_INVALID_CONTEXT -34
	#define CL_INVALID_QUEUE_PROPERTIES -35	#define CL_INVALID_QUEUE_PROPERTIES -35
	#define CL_INVALID_COMMAND_QUEUE -36	#define CL_INVALID_COMMAND_QUEUE -36
	#define CL_INVALID_HOST_PTR -37	#define CL_INVALID_HOST_PTR -37
	#define CL_INVALID_MEM_OBJECT -38	#define CL_INVALID_MEM_OBJECT -38

	skipping to change at line 144	skipping to change at line 151
	#define CL_INVALID_EVENT_WAIT_LIST -57	#define CL_INVALID_EVENT_WAIT_LIST -57
	#define CL_INVALID_EVENT -58	#define CL_INVALID_EVENT -58
	#define CL_INVALID_OPERATION -59	#define CL_INVALID_OPERATION -59
	#define CL_INVALID_GL_OBJECT -60	#define CL_INVALID_GL_OBJECT -60
	#define CL_INVALID_BUFFER_SIZE -61	#define CL_INVALID_BUFFER_SIZE -61
	#define CL_INVALID_MIP_LEVEL -62	#define CL_INVALID_MIP_LEVEL -62
	#define CL_INVALID_GLOBAL_WORK_SIZE -63	#define CL_INVALID_GLOBAL_WORK_SIZE -63

	/* OpenCL Version */	/* OpenCL Version */
	#define CL_VERSION_1_0 1	#define CL_VERSION_1_0 1

		#define CL_VERSION_1_1 1

	/* cl_bool */	/* cl_bool */
	#define CL_FALSE 0	#define CL_FALSE 0
	#define CL_TRUE 1	#define CL_TRUE 1

	/* cl_platform_info */	/* cl_platform_info */
	#define CL_PLATFORM_PROFILE 0x0900	#define CL_PLATFORM_PROFILE 0x0900
	#define CL_PLATFORM_VERSION 0x0901	#define CL_PLATFORM_VERSION 0x0901
	#define CL_PLATFORM_NAME 0x0902	#define CL_PLATFORM_NAME 0x0902
	#define CL_PLATFORM_VENDOR 0x0903	#define CL_PLATFORM_VENDOR 0x0903

	skipping to change at line 216	skipping to change at line 224
	#define CL_DEVICE_QUEUE_PROPERTIES 0x102A	#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
	#define CL_DEVICE_NAME 0x102B	#define CL_DEVICE_NAME 0x102B
	#define CL_DEVICE_VENDOR 0x102C	#define CL_DEVICE_VENDOR 0x102C
	#define CL_DRIVER_VERSION 0x102D	#define CL_DRIVER_VERSION 0x102D
	#define CL_DEVICE_PROFILE 0x102E	#define CL_DEVICE_PROFILE 0x102E
	#define CL_DEVICE_VERSION 0x102F	#define CL_DEVICE_VERSION 0x102F
	#define CL_DEVICE_EXTENSIONS 0x1030	#define CL_DEVICE_EXTENSIONS 0x1030
	#define CL_DEVICE_PLATFORM 0x1031	#define CL_DEVICE_PLATFORM 0x1031
	/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */	/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
	/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */	/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */

		#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
		#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
		#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
		#define CL_DEVICE_OPENCL_C_VERSION 0x103D

	/* cl_device_fp_config - bitfield */	/* cl_device_fp_config - bitfield */
	#define CL_FP_DENORM (1 << 0)	#define CL_FP_DENORM (1 << 0)
	#define CL_FP_INF_NAN (1 << 1)	#define CL_FP_INF_NAN (1 << 1)
	#define CL_FP_ROUND_TO_NEAREST (1 << 2)	#define CL_FP_ROUND_TO_NEAREST (1 << 2)
	#define CL_FP_ROUND_TO_ZERO (1 << 3)	#define CL_FP_ROUND_TO_ZERO (1 << 3)
	#define CL_FP_ROUND_TO_INF (1 << 4)	#define CL_FP_ROUND_TO_INF (1 << 4)
	#define CL_FP_FMA (1 << 5)	#define CL_FP_FMA (1 << 5)

		#define CL_FP_SOFT_FLOAT (1 << 6)

	/* cl_device_mem_cache_type */	/* cl_device_mem_cache_type */
	#define CL_NONE 0x0	#define CL_NONE 0x0
	#define CL_READ_ONLY_CACHE 0x1	#define CL_READ_ONLY_CACHE 0x1
	#define CL_READ_WRITE_CACHE 0x2	#define CL_READ_WRITE_CACHE 0x2

	/* cl_device_local_mem_type */	/* cl_device_local_mem_type */
	#define CL_LOCAL 0x1	#define CL_LOCAL 0x1
	#define CL_GLOBAL 0x2	#define CL_GLOBAL 0x2


	skipping to change at line 246	skipping to change at line 265
	#define CL_EXEC_NATIVE_KERNEL (1 << 1)	#define CL_EXEC_NATIVE_KERNEL (1 << 1)

	/* cl_command_queue_properties - bitfield */	/* cl_command_queue_properties - bitfield */
	#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)	#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
	#define CL_QUEUE_PROFILING_ENABLE (1 << 1)	#define CL_QUEUE_PROFILING_ENABLE (1 << 1)

	/* cl_context_info */	/* cl_context_info */
	#define CL_CONTEXT_REFERENCE_COUNT 0x1080	#define CL_CONTEXT_REFERENCE_COUNT 0x1080
	#define CL_CONTEXT_DEVICES 0x1081	#define CL_CONTEXT_DEVICES 0x1081
	#define CL_CONTEXT_PROPERTIES 0x1082	#define CL_CONTEXT_PROPERTIES 0x1082

		#define CL_CONTEXT_NUM_DEVICES 0x1083

	/* cl_context_info + cl_context_properties */	/* cl_context_info + cl_context_properties */
	#define CL_CONTEXT_PLATFORM 0x1084	#define CL_CONTEXT_PLATFORM 0x1084

	/* cl_command_queue_info */	/* cl_command_queue_info */
	#define CL_QUEUE_CONTEXT 0x1090	#define CL_QUEUE_CONTEXT 0x1090
	#define CL_QUEUE_DEVICE 0x1091	#define CL_QUEUE_DEVICE 0x1091
	#define CL_QUEUE_REFERENCE_COUNT 0x1092	#define CL_QUEUE_REFERENCE_COUNT 0x1092
	#define CL_QUEUE_PROPERTIES 0x1093	#define CL_QUEUE_PROPERTIES 0x1093


	skipping to change at line 275	skipping to change at line 295
	#define CL_R 0x10B0	#define CL_R 0x10B0
	#define CL_A 0x10B1	#define CL_A 0x10B1
	#define CL_RG 0x10B2	#define CL_RG 0x10B2
	#define CL_RA 0x10B3	#define CL_RA 0x10B3
	#define CL_RGB 0x10B4	#define CL_RGB 0x10B4
	#define CL_RGBA 0x10B5	#define CL_RGBA 0x10B5
	#define CL_BGRA 0x10B6	#define CL_BGRA 0x10B6
	#define CL_ARGB 0x10B7	#define CL_ARGB 0x10B7
	#define CL_INTENSITY 0x10B8	#define CL_INTENSITY 0x10B8
	#define CL_LUMINANCE 0x10B9	#define CL_LUMINANCE 0x10B9

		#define CL_Rx 0x10BA
		#define CL_RGx 0x10BB
		#define CL_RGBx 0x10BC

	/* cl_channel_type */	/* cl_channel_type */
	#define CL_SNORM_INT8 0x10D0	#define CL_SNORM_INT8 0x10D0
	#define CL_SNORM_INT16 0x10D1	#define CL_SNORM_INT16 0x10D1
	#define CL_UNORM_INT8 0x10D2	#define CL_UNORM_INT8 0x10D2
	#define CL_UNORM_INT16 0x10D3	#define CL_UNORM_INT16 0x10D3
	#define CL_UNORM_SHORT_565 0x10D4	#define CL_UNORM_SHORT_565 0x10D4
	#define CL_UNORM_SHORT_555 0x10D5	#define CL_UNORM_SHORT_555 0x10D5
	#define CL_UNORM_INT_101010 0x10D6	#define CL_UNORM_INT_101010 0x10D6
	#define CL_SIGNED_INT8 0x10D7	#define CL_SIGNED_INT8 0x10D7

	skipping to change at line 306	skipping to change at line 329
	#define CL_MEM_OBJECT_IMAGE3D 0x10F2	#define CL_MEM_OBJECT_IMAGE3D 0x10F2

	/* cl_mem_info */	/* cl_mem_info */
	#define CL_MEM_TYPE 0x1100	#define CL_MEM_TYPE 0x1100
	#define CL_MEM_FLAGS 0x1101	#define CL_MEM_FLAGS 0x1101
	#define CL_MEM_SIZE 0x1102	#define CL_MEM_SIZE 0x1102
	#define CL_MEM_HOST_PTR 0x1103	#define CL_MEM_HOST_PTR 0x1103
	#define CL_MEM_MAP_COUNT 0x1104	#define CL_MEM_MAP_COUNT 0x1104
	#define CL_MEM_REFERENCE_COUNT 0x1105	#define CL_MEM_REFERENCE_COUNT 0x1105
	#define CL_MEM_CONTEXT 0x1106	#define CL_MEM_CONTEXT 0x1106

		#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
		#define CL_MEM_OFFSET 0x1108

	/* cl_image_info */	/* cl_image_info */
	#define CL_IMAGE_FORMAT 0x1110	#define CL_IMAGE_FORMAT 0x1110
	#define CL_IMAGE_ELEMENT_SIZE 0x1111	#define CL_IMAGE_ELEMENT_SIZE 0x1111
	#define CL_IMAGE_ROW_PITCH 0x1112	#define CL_IMAGE_ROW_PITCH 0x1112
	#define CL_IMAGE_SLICE_PITCH 0x1113	#define CL_IMAGE_SLICE_PITCH 0x1113
	#define CL_IMAGE_WIDTH 0x1114	#define CL_IMAGE_WIDTH 0x1114
	#define CL_IMAGE_HEIGHT 0x1115	#define CL_IMAGE_HEIGHT 0x1115
	#define CL_IMAGE_DEPTH 0x1116	#define CL_IMAGE_DEPTH 0x1116

	/* cl_addressing_mode */	/* cl_addressing_mode */
	#define CL_ADDRESS_NONE 0x1130	#define CL_ADDRESS_NONE 0x1130
	#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131	#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
	#define CL_ADDRESS_CLAMP 0x1132	#define CL_ADDRESS_CLAMP 0x1132
	#define CL_ADDRESS_REPEAT 0x1133	#define CL_ADDRESS_REPEAT 0x1133

		#define CL_ADDRESS_MIRRORED_REPEAT 0x1134

	/* cl_filter_mode */	/* cl_filter_mode */
	#define CL_FILTER_NEAREST 0x1140	#define CL_FILTER_NEAREST 0x1140
	#define CL_FILTER_LINEAR 0x1141	#define CL_FILTER_LINEAR 0x1141

	/* cl_sampler_info */	/* cl_sampler_info */
	#define CL_SAMPLER_REFERENCE_COUNT 0x1150	#define CL_SAMPLER_REFERENCE_COUNT 0x1150
	#define CL_SAMPLER_CONTEXT 0x1151	#define CL_SAMPLER_CONTEXT 0x1151
	#define CL_SAMPLER_NORMALIZED_COORDS 0x1152	#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
	#define CL_SAMPLER_ADDRESSING_MODE 0x1153	#define CL_SAMPLER_ADDRESSING_MODE 0x1153

	skipping to change at line 368	skipping to change at line 394
	#define CL_KERNEL_FUNCTION_NAME 0x1190	#define CL_KERNEL_FUNCTION_NAME 0x1190
	#define CL_KERNEL_NUM_ARGS 0x1191	#define CL_KERNEL_NUM_ARGS 0x1191
	#define CL_KERNEL_REFERENCE_COUNT 0x1192	#define CL_KERNEL_REFERENCE_COUNT 0x1192
	#define CL_KERNEL_CONTEXT 0x1193	#define CL_KERNEL_CONTEXT 0x1193
	#define CL_KERNEL_PROGRAM 0x1194	#define CL_KERNEL_PROGRAM 0x1194

	/* cl_kernel_work_group_info */	/* cl_kernel_work_group_info */
	#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0	#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
	#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1	#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
	#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2	#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2

		#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
		#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4

	/* cl_event_info */	/* cl_event_info */
	#define CL_EVENT_COMMAND_QUEUE 0x11D0	#define CL_EVENT_COMMAND_QUEUE 0x11D0
	#define CL_EVENT_COMMAND_TYPE 0x11D1	#define CL_EVENT_COMMAND_TYPE 0x11D1
	#define CL_EVENT_REFERENCE_COUNT 0x11D2	#define CL_EVENT_REFERENCE_COUNT 0x11D2
	#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3	#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3

		#define CL_EVENT_CONTEXT 0x11D4

	/* cl_command_type */	/* cl_command_type */
	#define CL_COMMAND_NDRANGE_KERNEL 0x11F0	#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
	#define CL_COMMAND_TASK 0x11F1	#define CL_COMMAND_TASK 0x11F1
	#define CL_COMMAND_NATIVE_KERNEL 0x11F2	#define CL_COMMAND_NATIVE_KERNEL 0x11F2
	#define CL_COMMAND_READ_BUFFER 0x11F3	#define CL_COMMAND_READ_BUFFER 0x11F3
	#define CL_COMMAND_WRITE_BUFFER 0x11F4	#define CL_COMMAND_WRITE_BUFFER 0x11F4
	#define CL_COMMAND_COPY_BUFFER 0x11F5	#define CL_COMMAND_COPY_BUFFER 0x11F5
	#define CL_COMMAND_READ_IMAGE 0x11F6	#define CL_COMMAND_READ_IMAGE 0x11F6
	#define CL_COMMAND_WRITE_IMAGE 0x11F7	#define CL_COMMAND_WRITE_IMAGE 0x11F7
	#define CL_COMMAND_COPY_IMAGE 0x11F8	#define CL_COMMAND_COPY_IMAGE 0x11F8
	#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9	#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
	#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA	#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
	#define CL_COMMAND_MAP_BUFFER 0x11FB	#define CL_COMMAND_MAP_BUFFER 0x11FB
	#define CL_COMMAND_MAP_IMAGE 0x11FC	#define CL_COMMAND_MAP_IMAGE 0x11FC
	#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD	#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
	#define CL_COMMAND_MARKER 0x11FE	#define CL_COMMAND_MARKER 0x11FE
	#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF	#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
	#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200	#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200

		#define CL_COMMAND_READ_BUFFER_RECT 0x1201
		#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
		#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
		#define CL_COMMAND_USER 0x1204

	/* command execution status */	/* command execution status */
	#define CL_COMPLETE 0x0	#define CL_COMPLETE 0x0
	#define CL_RUNNING 0x1	#define CL_RUNNING 0x1
	#define CL_SUBMITTED 0x2	#define CL_SUBMITTED 0x2
	#define CL_QUEUED 0x3	#define CL_QUEUED 0x3


		/* cl_buffer_create_type */
		#define CL_BUFFER_CREATE_TYPE_REGION 0x1220

	/* cl_profiling_info */	/* cl_profiling_info */
	#define CL_PROFILING_COMMAND_QUEUED 0x1280	#define CL_PROFILING_COMMAND_QUEUED 0x1280
	#define CL_PROFILING_COMMAND_SUBMIT 0x1281	#define CL_PROFILING_COMMAND_SUBMIT 0x1281
	#define CL_PROFILING_COMMAND_START 0x1282	#define CL_PROFILING_COMMAND_START 0x1282
	#define CL_PROFILING_COMMAND_END 0x1283	#define CL_PROFILING_COMMAND_END 0x1283

	/************************************************************************ ****************************/	/************************************************************************ ****************************/

	/* Platform API */	/* Platform API */
	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

	skipping to change at line 441	skipping to change at line 477
	cl_device_info /* param_name */,	cl_device_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;

	/* Context APIs */	/* Context APIs */
	extern CL_API_ENTRY cl_context CL_API_CALL	extern CL_API_ENTRY cl_context CL_API_CALL
	clCreateContext(const cl_context_properties * /* properties */,	clCreateContext(const cl_context_properties * /* properties */,
	cl_uint /* num_devices */,	cl_uint /* num_devices */,
	const cl_device_id * /* devices */,	const cl_device_id * /* devices */,

	void (pfn_notify)(const char , const void , size_t, void ) /* pfn_notify */,	void (CL_CALLBACK * /* pfn_notify /)(const char , const v oid , size_t, void ),
	void * /* user_data */,	void * /* user_data */,
	cl_int * /* errcode_ret */) CL_API_SUF FIX__VERSION_1_0;	cl_int * /* errcode_ret */) CL_API_SUF FIX__VERSION_1_0;

	extern CL_API_ENTRY cl_context CL_API_CALL	extern CL_API_ENTRY cl_context CL_API_CALL
	clCreateContextFromType(const cl_context_properties * /* properties */,	clCreateContextFromType(const cl_context_properties * /* properties */,
	cl_device_type /* device_type */,	cl_device_type /* device_type */,

	void (pfn_notify)(const char , const void , size _t, void ) /* pfn_notify */,	void (CL_CALLBACK * /* pfn_notify/ )(const cha r , const void , size_t, void ),
	void * /* user_data */,	void * /* user_data */,
	cl_int * /* errcode_ret */) CL _API_SUFFIX__VERSION_1_0;	cl_int * /* errcode_ret */) CL _API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;	clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;	clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

	skipping to change at line 485	skipping to change at line 521
	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__ VERSION_1_0;	clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__ VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetCommandQueueInfo(cl_command_queue /* command_queue */,	clGetCommandQueueInfo(cl_command_queue /* command_queue */,
	cl_command_queue_info /* param_name */,	cl_command_queue_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;	size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;


		#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
		#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsup
		ported and untested in OpenCL 1.1!
		/*
		* WARNING:
		* This API introduces mutable state into the OpenCL implementation. It
		has been REMOVED
		* to better facilitate thread safety. The 1.0 API is not thread safe. It
		is not tested by the
		* OpenCL 1.1 conformance test, and consequently may not work or may not w
		ork dependably.
		* It is likely to be non-performant. Use of this API is not advised. Use
		at your own risk.
		*
		* Software developers previously relying on this API are instructed to se
		t the command queue
		* properties when creating the queue, instead.
		*/
	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clSetCommandQueueProperty(cl_command_queue /* command_queue */ ,	clSetCommandQueueProperty(cl_command_queue /* command_queue */ ,
	cl_command_queue_properties /* properties */,	cl_command_queue_properties /* properties */,
	cl_bool /* enable */,	cl_bool /* enable */,

	cl_command_queue_properties * /* old_properties *	cl_command_queue_properties * /* old_properties *
	/) CL_API_SUFFIX__VERSION_1_0;	/) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
		#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */


	/* Memory Object APIs */	/* Memory Object APIs */
	extern CL_API_ENTRY cl_mem CL_API_CALL	extern CL_API_ENTRY cl_mem CL_API_CALL
	clCreateBuffer(cl_context /* context */,	clCreateBuffer(cl_context /* context */,
	cl_mem_flags /* flags */,	cl_mem_flags /* flags */,
	size_t /* size */,	size_t /* size */,
	void * /* host_ptr */,	void * /* host_ptr */,
	cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;	cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_mem CL_API_CALL	extern CL_API_ENTRY cl_mem CL_API_CALL

		clCreateSubBuffer(cl_mem /* buffer */,
		cl_mem_flags /* flags */,
		cl_buffer_create_type /* buffer_create_type */,
		const void * /* buffer_create_info */,
		cl_int * /* errcode_ret */) CL_API_SUFFIX
		__VERSION_1_1;

		extern CL_API_ENTRY cl_mem CL_API_CALL
	clCreateImage2D(cl_context /* context */,	clCreateImage2D(cl_context /* context */,
	cl_mem_flags /* flags */,	cl_mem_flags /* flags */,
	const cl_image_format * /* image_format */,	const cl_image_format * /* image_format */,
	size_t /* image_width */,	size_t /* image_width */,
	size_t /* image_height */,	size_t /* image_height */,
	size_t /* image_row_pitch */,	size_t /* image_row_pitch */,
	void * /* host_ptr */,	void * /* host_ptr */,
	cl_int * /* errcode_ret */) CL_API_SUFFIX__V ERSION_1_0;	cl_int * /* errcode_ret */) CL_API_SUFFIX__V ERSION_1_0;

	extern CL_API_ENTRY cl_mem CL_API_CALL	extern CL_API_ENTRY cl_mem CL_API_CALL

	skipping to change at line 549	skipping to change at line 605
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_API_SUFF IX__VERSION_1_0;	size_t * /* param_value_size_ret */) CL_API_SUFF IX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetImageInfo(cl_mem /* image */,	clGetImageInfo(cl_mem /* image */,
	cl_image_info /* param_name */,	cl_image_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;


		extern CL_API_ENTRY cl_int CL_API_CALL
		clSetMemObjectDestructorCallback( cl_mem /* memobj */,
		void (CL_CALLBACK * /pfn_notify/)( cl
		_mem /* memobj /, void /user_data/),
		void * /user_data / ) CL_
		API_SUFFIX__VERSION_1_1;

	/* Sampler APIs */	/* Sampler APIs */
	extern CL_API_ENTRY cl_sampler CL_API_CALL	extern CL_API_ENTRY cl_sampler CL_API_CALL
	clCreateSampler(cl_context /* context */,	clCreateSampler(cl_context /* context */,
	cl_bool /* normalized_coords */,	cl_bool /* normalized_coords */,
	cl_addressing_mode /* addressing_mode */,	cl_addressing_mode /* addressing_mode */,
	cl_filter_mode /* filter_mode */,	cl_filter_mode /* filter_mode */,
	cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSI ON_1_0;	cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSI ON_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;	clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;

	skipping to change at line 598	skipping to change at line 659
	clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;	clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;	clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clBuildProgram(cl_program /* program */,	clBuildProgram(cl_program /* program */,
	cl_uint /* num_devices */,	cl_uint /* num_devices */,
	const cl_device_id * /* device_list */,	const cl_device_id * /* device_list */,
	const char * /* options */,	const char * /* options */,

	void (pfn_notify)(cl_program / program /, void /* user_ data */),	void (CL_CALLBACK * /* pfn_notify /)(cl_program / program /, void /* user_data */),
	void * /* user_data */) CL_API_SUFFIX__VERSION _1_0;	void * /* user_data */) CL_API_SUFFIX__VERSION _1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;	clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetProgramInfo(cl_program /* program */,	clGetProgramInfo(cl_program /* program */,
	cl_program_info /* param_name */,	cl_program_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,

	skipping to change at line 670	skipping to change at line 731
	clWaitForEvents(cl_uint /* num_events */,	clWaitForEvents(cl_uint /* num_events */,
	const cl_event * /* event_list */) CL_API_SUFFIX__VERSIO N_1_0;	const cl_event * /* event_list */) CL_API_SUFFIX__VERSIO N_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetEventInfo(cl_event /* event */,	clGetEventInfo(cl_event /* event */,
	cl_event_info /* param_name */,	cl_event_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;	size_t * /* param_value_size_ret */) CL_API_SUFFIX__ VERSION_1_0;


		extern CL_API_ENTRY cl_event CL_API_CALL
		clCreateUserEvent(cl_context /* context */,
		cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1
		_1;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;	clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;	clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;


		extern CL_API_ENTRY cl_int CL_API_CALL
		clSetUserEventStatus(cl_event /* event */,
		cl_int /* execution_status */) CL_API_SUFFIX__VERS
		ION_1_1;

		extern CL_API_ENTRY cl_int CL_API_CALL
		clSetEventCallback( cl_event /* event */,
		cl_int /* command_exec_callback_type */,
		void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int,
		void *),
		void * /* user_data */) CL_API_SUFFIX__VERSION_1_1
		;

	/* Profiling APIs */	/* Profiling APIs */
	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetEventProfilingInfo(cl_event /* event */,	clGetEventProfilingInfo(cl_event /* event */,
	cl_profiling_info /* param_name */,	cl_profiling_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;	size_t * /* param_value_size_ret */) CL_ API_SUFFIX__VERSION_1_0;

	/* Flush and Finish APIs */	/* Flush and Finish APIs */
	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

	skipping to change at line 704	skipping to change at line 779
	cl_mem /* buffer */,	cl_mem /* buffer */,
	cl_bool /* blocking_read */,	cl_bool /* blocking_read */,
	size_t /* offset */,	size_t /* offset */,
	size_t /* cb */,	size_t /* cb */,
	void * /* ptr */,	void * /* ptr */,
	cl_uint /* num_events_in_wait_list */,	cl_uint /* num_events_in_wait_list */,
	const cl_event * /* event_wait_list */,	const cl_event * /* event_wait_list */,
	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

		clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
		cl_mem /* buffer */,
		cl_bool /* blocking_read */,
		const size_t * /* buffer_offset */,
		const size_t * /* host_offset */,
		const size_t * /* region */,
		size_t /* buffer_row_pitch */,
		size_t /* buffer_slice_pitch */,
		size_t /* host_row_pitch */,
		size_t /* host_slice_pitch */,
		void * /* ptr */,
		cl_uint /* num_events_in_wait_list */,
		const cl_event * /* event_wait_list */,
		cl_event * /* event */) CL_API_SUFFIX__VER
		SION_1_1;

		extern CL_API_ENTRY cl_int CL_API_CALL
	clEnqueueWriteBuffer(cl_command_queue /* command_queue */,	clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
	cl_mem /* buffer */,	cl_mem /* buffer */,
	cl_bool /* blocking_write */,	cl_bool /* blocking_write */,
	size_t /* offset */,	size_t /* offset */,
	size_t /* cb */,	size_t /* cb */,
	const void * /* ptr */,	const void * /* ptr */,
	cl_uint /* num_events_in_wait_list */,	cl_uint /* num_events_in_wait_list */,
	const cl_event * /* event_wait_list */,	const cl_event * /* event_wait_list */,
	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

		clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
		cl_mem /* buffer */,
		cl_bool /* blocking_read */,
		const size_t * /* buffer_offset */,
		const size_t * /* host_offset */,
		const size_t * /* region */,
		size_t /* buffer_row_pitch */,
		size_t /* buffer_slice_pitch */,
		size_t /* host_row_pitch */,
		size_t /* host_slice_pitch */,
		const void * /* ptr */,
		cl_uint /* num_events_in_wait_list */,
		const cl_event * /* event_wait_list */,
		cl_event * /* event */) CL_API_SUFFIX__VE
		RSION_1_1;

		extern CL_API_ENTRY cl_int CL_API_CALL
	clEnqueueCopyBuffer(cl_command_queue /* command_queue */,	clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
	cl_mem /* src_buffer */,	cl_mem /* src_buffer */,
	cl_mem /* dst_buffer */,	cl_mem /* dst_buffer */,
	size_t /* src_offset */,	size_t /* src_offset */,
	size_t /* dst_offset */,	size_t /* dst_offset */,
	size_t /* cb */,	size_t /* cb */,
	cl_uint /* num_events_in_wait_list */,	cl_uint /* num_events_in_wait_list */,
	const cl_event * /* event_wait_list */,	const cl_event * /* event_wait_list */,
	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;	cl_event * /* event */) CL_API_SUFFIX__VERSION _1_0;

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL

		clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
		cl_mem /* src_buffer */,
		cl_mem /* dst_buffer */,
		const size_t * /* src_origin */,
		const size_t * /* dst_origin */,
		const size_t * /* region */,
		size_t /* src_row_pitch */,
		size_t /* src_slice_pitch */,
		size_t /* dst_row_pitch */,
		size_t /* dst_slice_pitch */,
		cl_uint /* num_events_in_wait_list */,
		const cl_event * /* event_wait_list */,
		cl_event * /* event */) CL_API_SUFFIX__VER
		SION_1_1;

		extern CL_API_ENTRY cl_int CL_API_CALL
	clEnqueueReadImage(cl_command_queue /* command_queue */,	clEnqueueReadImage(cl_command_queue /* command_queue */,
	cl_mem /* image */,	cl_mem /* image */,
	cl_bool /* blocking_read */,	cl_bool /* blocking_read */,
	const size_t * /* origin[3] */,	const size_t * /* origin[3] */,
	const size_t * /* region[3] */,	const size_t * /* region[3] */,
	size_t /* row_pitch */,	size_t /* row_pitch */,
	size_t /* slice_pitch */,	size_t /* slice_pitch */,
	void * /* ptr */,	void * /* ptr */,
	cl_uint /* num_events_in_wait_list */,	cl_uint /* num_events_in_wait_list */,
	const cl_event * /* event_wait_list */,	const cl_event * /* event_wait_list */,

End of changes. 30 change blocks.
	9 lines changed or deleted	147 lines changed or added

	cl_ext.h	cl_ext.h
	/************************************************************************ ***	/************************************************************************ ***

	* Copyright (c) 2008-2009 The Khronos Group Inc.	* Copyright (c) 2008-2010 The Khronos Group Inc.
	*	*
	* Permission is hereby granted, free of charge, to any person obtaining a	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and/or associated documentation files (the	* copy of this software and/or associated documentation files (the
	* "Materials"), to deal in the Materials without restriction, including	* "Materials"), to deal in the Materials without restriction, including
	* without limitation the rights to use, copy, modify, merge, publish,	* without limitation the rights to use, copy, modify, merge, publish,
	* distribute, sublicense, and/or sell copies of the Materials, and to	* distribute, sublicense, and/or sell copies of the Materials, and to
	* permit persons to whom the Materials are furnished to do so, subject to	* permit persons to whom the Materials are furnished to do so, subject to
	* the following conditions:	* the following conditions:
	*	*
	* The above copyright notice and this permission notice shall be included	* The above copyright notice and this permission notice shall be included

	skipping to change at line 24	skipping to change at line 24
	*	*
	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
	************************************************************************ **/	************************************************************************ **/


	/* $Revision$ on $Date$ */	/* $Revision: 11687 $ on $Date: 2010-06-12 03:47:22 +0530 (Sat, 12 Jun 2010
		) $ */

		/* cl_ext.h contains OpenCL extensions which don't have external */
		/* (OpenGL, D3D) dependencies. */

	#ifndef __CL_EXT_H	#ifndef __CL_EXT_H
	#define __CL_EXT_H	#define __CL_EXT_H

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif


		#ifdef __APPLE__
		#include <OpenCL/cl.h>
		#include <AvailabilityMacros.h>
		#else
		#include <CL/cl.h>
		#endif

	/* cl_khr_fp64 extension - no extension #define since it has no functions */	/* cl_khr_fp64 extension - no extension #define since it has no functions */
	#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032	#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032

	/* cl_khr_fp16 extension - no extension #define since it has no functions */	/* cl_khr_fp16 extension - no extension #define since it has no functions */
	#define CL_DEVICE_HALF_FP_CONFIG 0x1033	#define CL_DEVICE_HALF_FP_CONFIG 0x1033


	/* cl_khr_icd extension	/* Memory object destruction
	*/	*
		* Apple extension for use to manage externally allocated buffers used with
		cl_mem objects with CL_MEM_USE_HOST_PTR
		*
		* Registers a user callback function that will be called when the memory o
		bject is deleted and its resources
		* freed. Each call to clSetMemObjectCallbackFn registers the specified use
		r callback function on a callback
		* stack associated with memobj. The registered user callback functions are
		called in the reverse order in
		* which they were registered. The user callback functions are called and t
		hen the memory object is deleted
		* and its resources freed. This provides a mechanism for the application (
		and libraries) using memobj to be
		* notified when the memory referenced by host_ptr, specified when the memo
		ry object is created and used as
		* the storage bits for the memory object, can be reused or freed.
		*
		* The application may not call CL api's with the cl_mem object passed to t
		he pfn_notify.
		*
		* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using c
		lGetDeviceInfo(CL_DEVICE_EXTENSIONS)
		* before using.
		*/
		#define cl_APPLE_SetMemObjectDestructor 1
		cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
		void (* /pfn_notify/)( cl_mem /*
		memobj /, void /user_data/),
		void * /user_data / )
		CL_EXT_SUFFIX__VERSION_1_0;

		/* Context Logging Functions
		*
		* The next three convenience functions are intended to be used as the pfn_
		notify parameter to clCreateContext().
		* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using
		clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
		* before using.
		*
		* clLogMessagesToSystemLog fowards on all log messages to the Apple System
		Logger
		*/
		#define cl_APPLE_ContextLoggingFunctions 1
		extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* er
		rstr */,
		const void * /* private_info */
		,
		size_t /* cb */,
		void * /* user_data */ )
		CL_EXT_SUFFIX__VERSION_1_0;

		/* clLogMessagesToStdout sends all log messages to the file descriptor stdo
		ut */
		extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errs
		tr */,
		const void * /* private_info */,
		size_t /* cb */,
		void * /* user_data */ )
		CL_EXT_SUFFIX__VERSION_1_0;

		/* clLogMessagesToStderr sends all log messages to the file descriptor stde
		rr */
		extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errs
		tr */,
		const void * /* private_info */,
		size_t /* cb */,
		void * /* user_data */ )
		CL_EXT_SUFFIX__VERSION_1_0;

		/************************
		* cl_khr_icd extension *
		************************/
	#define cl_khr_icd 1	#define cl_khr_icd 1

	/* cl_platform_info */	/* cl_platform_info */
	#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920	#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920

	/* Additional Error Codes */	/* Additional Error Codes */
	#define CL_PLATFORM_NOT_FOUND_KHR -1001	#define CL_PLATFORM_NOT_FOUND_KHR -1001

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,	clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
	cl_platform_id * /* platforms */,	cl_platform_id * /* platforms */,
	cl_uint * /* num_platforms */);	cl_uint * /* num_platforms */);


		typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
		cl_uint /* num_entries */,
		cl_platform_id * /* platforms */,
		cl_uint * /* num_platforms */);

		/******************************************
		* cl_nv_device_attribute_query extension *
		******************************************/
	/* cl_nv_device_attribute_query extension - no extension #define since it h as no functions */	/* cl_nv_device_attribute_query extension - no extension #define since it h as no functions */
	#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000	#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
	#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001	#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
	#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002	#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
	#define CL_DEVICE_WARP_SIZE_NV 0x4003	#define CL_DEVICE_WARP_SIZE_NV 0x4003
	#define CL_DEVICE_GPU_OVERLAP_NV 0x4004	#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
	#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005	#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
	#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006	#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006


		/*********************************
		* cl_amd_device_attribute_query *
		*********************************/
		#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036

		#ifdef CL_VERSION_1_1
		/***********************************
		* cl_ext_device_fission extension *
		***********************************/
		#define cl_ext_device_fission 1

		extern CL_API_ENTRY cl_int CL_API_CALL
		clReleaseDeviceEXT( cl_device_id /device/ ) CL_EXT_SUFFIX__VERSION_1_
		1;

		typedef CL_API_ENTRY cl_int
		(CL_API_CALL clReleaseDeviceEXT_fn)( cl_device_id /device*/ ) CL_EXT_
		SUFFIX__VERSION_1_1;

		extern CL_API_ENTRY cl_int CL_API_CALL
		clRetainDeviceEXT( cl_device_id /device/ ) CL_EXT_SUFFIX__VERSION_1_1
		;

		typedef CL_API_ENTRY cl_int
		(CL_API_CALL clRetainDeviceEXT_fn)( cl_device_id /device*/ ) CL_EXT_S
		UFFIX__VERSION_1_1;

		typedef cl_ulong cl_device_partition_property_ext;
		extern CL_API_ENTRY cl_int CL_API_CALL
		clCreateSubDevicesEXT( cl_device_id /in_device/,
		const cl_device_partition_property_ext * /* pro
		perties */,
		cl_uint /num_entries/,
		cl_device_id * /out_devices/,
		cl_uint * /num_devices/ ) CL_EXT_SUFFIX__VERS
		ION_1_1;

		extern CL_API_ENTRY cl_int
		( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /in_device/,
		const cl_device_partition_p
		roperty_ext * /* properties */,
		cl_uint /num_entries/,
		cl_device_id * /*out_device
		s*/,
		cl_uint * /num_devices/ )
		CL_EXT_SUFFIX__VERSION_1_1;

		/* cl_device_partition_property_ext */
		#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
		#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
		#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
		#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053

		/* clDeviceGetInfo selectors */
		#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
		#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
		#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
		#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
		#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058

		/* error codes */
		#define CL_DEVICE_PARTITION_FAILED_EXT -1057
		#define CL_INVALID_PARTITION_COUNT_EXT -1058
		#define CL_INVALID_PARTITION_NAME_EXT -1059

		/* CL_AFFINITY_DOMAINs */
		#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
		#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
		#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
		#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
		#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
		#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100

		/* cl_device_partition_property_ext list terminators */
		#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partiti
		on_property_ext) 0)
		#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partiti
		on_property_ext) 0)
		#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partiti
		on_property_ext) 0 - 1)

		#endif /* CL_VERSION_1_1 */

	#ifdef __cplusplus	#ifdef __cplusplus
	}	}
	#endif	#endif

	#endif /* __CL_EXT_H */	#endif /* __CL_EXT_H */

End of changes. 6 change blocks.
	4 lines changed or deleted	178 lines changed or added

	cl_gl.h	cl_gl.h
	/************************************************************************ ******	/************************************************************************ ******

	* Copyright (c) 2008-2009 The Khronos Group Inc.	* Copyright (c) 2008-2010 The Khronos Group Inc.
	*	*
	* Permission is hereby granted, free of charge, to any person obtaining a	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and/or associated documentation files (the	* copy of this software and/or associated documentation files (the
	* "Materials"), to deal in the Materials without restriction, including	* "Materials"), to deal in the Materials without restriction, including
	* without limitation the rights to use, copy, modify, merge, publish,	* without limitation the rights to use, copy, modify, merge, publish,
	* distribute, sublicense, and/or sell copies of the Materials, and to	* distribute, sublicense, and/or sell copies of the Materials, and to
	* permit persons to whom the Materials are furnished to do so, subject to	* permit persons to whom the Materials are furnished to do so, subject to
	* the following conditions:	* the following conditions:
	*	*
	* The above copyright notice and this permission notice shall be included	* The above copyright notice and this permission notice shall be included

	skipping to change at line 24	skipping to change at line 24
	*	*
	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
	************************************************************************ ******/	************************************************************************ ******/


	/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */	/* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */

	/*	/*
	* cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have	* cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
	* OpenGL dependencies. The application is responsible for #including	* OpenGL dependencies. The application is responsible for #including
	* OpenGL or OpenGL ES headers before #including cl_gl.h.	* OpenGL or OpenGL ES headers before #including cl_gl.h.
	*/	*/

	#ifndef __OPENCL_CL_GL_H	#ifndef __OPENCL_CL_GL_H
	#define __OPENCL_CL_GL_H	#define __OPENCL_CL_GL_H

	#ifdef __APPLE__	#ifdef __APPLE__
	#include <OpenCL/cl.h>	#include <OpenCL/cl.h>

		#include <OpenGL/CGLDevice.h>
	#else	#else
	#include <CL/cl.h>	#include <CL/cl.h>
	#endif	#endif

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif

	typedef cl_uint cl_gl_object_type;	typedef cl_uint cl_gl_object_type;
	typedef cl_uint cl_gl_texture_info;	typedef cl_uint cl_gl_texture_info;
	typedef cl_uint cl_gl_platform_info;	typedef cl_uint cl_gl_platform_info;

		typedef struct __GLsync *cl_GLsync;

	/* cl_gl_object_type */	/* cl_gl_object_type */
	#define CL_GL_OBJECT_BUFFER 0x2000	#define CL_GL_OBJECT_BUFFER 0x2000
	#define CL_GL_OBJECT_TEXTURE2D 0x2001	#define CL_GL_OBJECT_TEXTURE2D 0x2001
	#define CL_GL_OBJECT_TEXTURE3D 0x2002	#define CL_GL_OBJECT_TEXTURE3D 0x2002
	#define CL_GL_OBJECT_RENDERBUFFER 0x2003	#define CL_GL_OBJECT_RENDERBUFFER 0x2003

	/* cl_gl_texture_info */	/* cl_gl_texture_info */
	#define CL_GL_TEXTURE_TARGET 0x2004	#define CL_GL_TEXTURE_TARGET 0x2004
	#define CL_GL_MIPMAP_LEVEL 0x2005	#define CL_GL_MIPMAP_LEVEL 0x2005

	skipping to change at line 142	skipping to change at line 144
	#define CL_WGL_HDC_KHR 0x200B	#define CL_WGL_HDC_KHR 0x200B
	#define CL_CGL_SHAREGROUP_KHR 0x200C	#define CL_CGL_SHAREGROUP_KHR 0x200C

	extern CL_API_ENTRY cl_int CL_API_CALL	extern CL_API_ENTRY cl_int CL_API_CALL
	clGetGLContextInfoKHR(const cl_context_properties * /* properties */,	clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
	cl_gl_context_info /* param_name */,	cl_gl_context_info /* param_name */,
	size_t /* param_value_size */,	size_t /* param_value_size */,
	void * /* param_value */,	void * /* param_value */,
	size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;	size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;


		typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
		const cl_context_properties * properties,
		cl_gl_context_info param_name,
		size_t param_value_size,
		void * param_value,
		size_t * param_value_size_ret);

	#ifdef __cplusplus	#ifdef __cplusplus
	}	}
	#endif	#endif

	#endif /* __OPENCL_CL_GL_H */	#endif /* __OPENCL_CL_GL_H */

End of changes. 5 change blocks.
	2 lines changed or deleted	11 lines changed or added

	cl_gl_ext.h	cl_gl_ext.h
	/************************************************************************ ******	/************************************************************************ ******

	* Copyright (c) 2008-2009 The Khronos Group Inc.	* Copyright (c) 2008-2010 The Khronos Group Inc.
	*	*
	* Permission is hereby granted, free of charge, to any person obtaining a	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and/or associated documentation files (the	* copy of this software and/or associated documentation files (the
	* "Materials"), to deal in the Materials without restriction, including	* "Materials"), to deal in the Materials without restriction, including
	* without limitation the rights to use, copy, modify, merge, publish,	* without limitation the rights to use, copy, modify, merge, publish,
	* distribute, sublicense, and/or sell copies of the Materials, and to	* distribute, sublicense, and/or sell copies of the Materials, and to
	* permit persons to whom the Materials are furnished to do so, subject to	* permit persons to whom the Materials are furnished to do so, subject to
	* the following conditions:	* the following conditions:
	*	*
	* The above copyright notice and this permission notice shall be included	* The above copyright notice and this permission notice shall be included

	skipping to change at line 24	skipping to change at line 24
	*	*
	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
	************************************************************************ ******/	************************************************************************ ******/


	/* $Revision: 10327 $ on $Date: 2010-02-11 00:24:35 +0530 (Thu, 11 Feb 2010 ) $ */	/* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */

	/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */	/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
	/* OpenGL dependencies. */	/* OpenGL dependencies. */

	#ifndef __OPENCL_CL_GL_EXT_H	#ifndef __OPENCL_CL_GL_EXT_H
	#define __OPENCL_CL_GL_EXT_H	#define __OPENCL_CL_GL_EXT_H

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif


		#ifdef __APPLE__
		#include <OpenCL/cl_gl.h>
		#else
		#include <CL/cl_gl.h>
		#endif

	/*	/*
	* For each extension, follow this template	* For each extension, follow this template
	* /* cl_VEN_extname extension */	* /* cl_VEN_extname extension */
	/* #define cl_VEN_extname 1	/* #define cl_VEN_extname 1
	* ... define new types, if any	* ... define new types, if any
	* ... define new tokens, if any	* ... define new tokens, if any
	* ... define new APIs, if any	* ... define new APIs, if any
	*	*
	* If you need GLtypes here, mirror them with a cl_GLtype, rather than inc luding a GL header	* If you need GLtypes here, mirror them with a cl_GLtype, rather than inc luding a GL header
	* This allows us to avoid having to decide whether to include GL headers or GLES here.	* This allows us to avoid having to decide whether to include GL headers or GLES here.
	*/	*/


		/*
		* cl_khr_gl_event extension
		* See section 9.9 in the OpenCL 1.1 spec for more information
		*/
		#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D

		extern CL_API_ENTRY cl_event CL_API_CALL
		clCreateEventFromGLsyncKHR(cl_context /* context */,
		cl_GLsync /* cl_GLsync */,
		cl_int * /* errcode_ret */) CL_EXT_S
		UFFIX__VERSION_1_1;

	#ifdef __cplusplus	#ifdef __cplusplus
	}	}
	#endif	#endif

	#endif /* __OPENCL_CL_GL_EXT_H */	#endif /* __OPENCL_CL_GL_EXT_H */

End of changes. 4 change blocks.
	2 lines changed or deleted	20 lines changed or added

	common_functions.h	common_functions.h

	skipping to change at line 68	skipping to change at line 68
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __host__ __device__ void* __cdecl memset(void*, int, size_ t) __THROW;	extern __host__ __device__ void* __cdecl memset(void*, int, size_ t) __THROW;
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __host__ __device__ void* __cdecl memcpy(void, const void , size_t) __THROW;	extern __host__ __device__ void* __cdecl memcpy(void, const void , size_t) __THROW;

	}	}

	#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 200	#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 200

	#include <stdio.h>	#include <stdio.h>

		#include <stdlib.h>

	extern "C"	extern "C"
	{	{

	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/

	extern __host__ __device__ int __cdecl printf(const char*, ...)	extern _CRTIMP __host__ __device__ int __cdecl printf(const char*, ...)
	;	;
		extern _CRTIMP __host__ __device__ void* __cdecl malloc(size_t) __THROW;
		extern _CRTIMP __host__ __device__ void __cdecl free(void*) __THROW;

	}	}

	#endif /* __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 */	#endif /* __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 */

	#endif /* __cplusplus && __CUDACC__ */	#endif /* __cplusplus && __CUDACC__ */

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *

End of changes. 2 change blocks.
	2 lines changed or deleted	5 lines changed or added

	cuComplex.h	cuComplex.h

	skipping to change at line 46	skipping to change at line 46
	#if !defined(CU_COMPLEX_H_)	#if !defined(CU_COMPLEX_H_)
	#define CU_COMPLEX_H_	#define CU_COMPLEX_H_

	#if defined(__cplusplus)	#if defined(__cplusplus)
	extern "C" {	extern "C" {
	#endif /* __cplusplus */	#endif /* __cplusplus */

	#include <math.h> /* import fabsf, sqrt */	#include <math.h> /* import fabsf, sqrt */
	#include "vector_types.h"	#include "vector_types.h"


	/* versions for hosts without native support for 'complex' */

	#if (!defined(__CUDACC__) && defined(CU_USE_NATIVE_COMPLEX))
	#include <complex.h>

	/* wrapper functions around C99 native complex support. NOTE: Untested! */

	/* -- Single Precision -- */
	typedef complex cuFloatComplex;
	__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x) {
	return crealf(x);
	}

	__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x) {
	return cimagf(x);
	}

	__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
	(float x, floa
	t y)
	{
	return x + I * y;
	}

	__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComple
	x x)
	{
	return conjf (x);
	}

	__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComple
	x x,
	cuFloatComple
	x y)
	{
	return x + y;
	}

	__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComple
	x x,
	cuFloatComple
	x y)
	{
	return x - y;
	}

	__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComple
	x x,
	cuFloatComple
	x y)
	{
	return x * y;
	}

	__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComple
	x x,
	cuFloatComple
	x y)
	{
	return x / y;
	}

	__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
	{
	return cabsf (x);
	}

	/* -- Double Precision -- */
	typedef double complex cuDoubleComplex;
	__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
	{
	return creal(x);
	}

	__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
	{
	return cimag(x);
	}

	__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
	(double x, doubl
	e y)
	{
	return x + I * y;
	}

	__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComple
	x x)
	{
	return conj (x);
	}

	__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComple
	x x,
	cuDoubleComple
	x y)
	{
	return x + y;
	}

	__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComple
	x x,
	cuDoubleComple
	x y)
	{
	return x - y;
	}

	__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComple
	x x,
	cuDoubleComple
	x y)
	{
	return x * y;
	}

	__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComple
	x x,
	cuDoubleComple
	x y)
	{
	return x / y;
	}

	__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
	{
	return cabs (x);
	}

	/* versions for target or hosts without native support for 'complex' */

	#else /* (defined(__CUDACC__) \|\| (!(defined(CU_USE_NATIVE_COMPLEX)))) */

	typedef float2 cuFloatComplex;	typedef float2 cuFloatComplex;

	__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)	__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
	{	{
	return x.x;	return x.x;
	}	}

	__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)	__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
	{	{
	return x.y;	return x.y;

	skipping to change at line 376	skipping to change at line 263
	t = w / v;	t = w / v;
	t = 1.0 + t * t;	t = 1.0 + t * t;
	t = v * sqrt(t);	t = v * sqrt(t);
	if ((v == 0.0) \|\|	if ((v == 0.0) \|\|
	(v > 1.79769313486231570e+308) \|\| (w > 1.79769313486231570e+308)) {	(v > 1.79769313486231570e+308) \|\| (w > 1.79769313486231570e+308)) {
	t = v + w;	t = v + w;
	}	}
	return t;	return t;
	}	}


	#endif /* (!defined(__CUDACC__) && defined(CU_USE_NATIVE_COMPLEX))) */

	#if defined(__cplusplus)	#if defined(__cplusplus)
	}	}
	#endif /* __cplusplus */	#endif /* __cplusplus */

	/* aliases */	/* aliases */
	typedef cuFloatComplex cuComplex;	typedef cuFloatComplex cuComplex;
	__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,	__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
	float y)	float y)
	{	{
	return make_cuFloatComplex (x, y);	return make_cuFloatComplex (x, y);

End of changes. 2 change blocks.
	135 lines changed or deleted	0 lines changed or added

	cuda.h	cuda.h

	skipping to change at line 42	skipping to change at line 42
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#ifndef __cuda_cuda_h__	#ifndef __cuda_cuda_h__
	#define __cuda_cuda_h__	#define __cuda_cuda_h__

	#include <stdlib.h>	#include <stdlib.h>

	/**	/**

	* \file	* CUDA API versioning support
	* \name Data types used by CUDA driver	*/
	* \author NVIDIA Corporation	#if defined(CUDA_FORCE_API_VERSION)
	* \brief Data types used by CUDA driver	#if (CUDA_FORCE_API_VERSION == 3010)
		#define __CUDA_API_VERSION 3010
		#else
		#error "Unsupported value of CUDA_FORCE_API_VERSION"
		#endif
		#else
		#define __CUDA_API_VERSION 3020
		#endif /* CUDA_FORCE_API_VERSION */

		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION >= 3020
		#define cuDeviceTotalMem cuDeviceTotalMem_v2
		#define cuCtxCreate cuCtxCreate_v2
		#define cuModuleGetGlobal cuModuleGetGlobal_v2
		#define cuMemGetInfo cuMemGetInfo_v2
		#define cuMemAlloc cuMemAlloc_v2
		#define cuMemAllocPitch cuMemAllocPitch_v2
		#define cuMemFree cuMemFree_v2
		#define cuMemGetAddressRange cuMemGetAddressRange_v2
		#define cuMemAllocHost cuMemAllocHost_v2
		#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v
		2
		#define cuMemcpyHtoD cuMemcpyHtoD_v2
		#define cuMemcpyDtoH cuMemcpyDtoH_v2
		#define cuMemcpyDtoD cuMemcpyDtoD_v2
		#define cuMemcpyDtoA cuMemcpyDtoA_v2
		#define cuMemcpyAtoD cuMemcpyAtoD_v2
		#define cuMemcpyHtoA cuMemcpyHtoA_v2
		#define cuMemcpyAtoH cuMemcpyAtoH_v2
		#define cuMemcpyAtoA cuMemcpyAtoA_v2
		#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
		#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
		#define cuMemcpy2D cuMemcpy2D_v2
		#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
		#define cuMemcpy3D cuMemcpy3D_v2
		#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
		#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
		#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
		#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
		#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
		#define cuMemsetD8 cuMemsetD8_v2
		#define cuMemsetD16 cuMemsetD16_v2
		#define cuMemsetD32 cuMemsetD32_v2
		#define cuMemsetD2D8 cuMemsetD2D8_v2
		#define cuMemsetD2D16 cuMemsetD2D16_v2
		#define cuMemsetD2D32 cuMemsetD2D32_v2
		#define cuArrayCreate cuArrayCreate_v2
		#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
		#define cuArray3DCreate cuArray3DCreate_v2
		#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
		#define cuTexRefSetAddress cuTexRefSetAddress_v2
		#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
		#define cuTexRefGetAddress cuTexRefGetAddress_v2
		#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMapped
		Pointer_v2
		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION >= 3020 */

		/**
		* \defgroup CUDA_DRIVER CUDA Driver API
		*
		* This section describes the low-level CUDA driver application programming
		* interface.
		*
		* @{
	*/	*/

	/**	/**
	* \defgroup CUDA_TYPES Data types used by CUDA driver	* \defgroup CUDA_TYPES Data types used by CUDA driver

	* \ingroup CUDA_DRIVER
	* @{	* @{
	*/	*/

	/**	/**
	* CUDA API version number	* CUDA API version number
	*/	*/

	#define CUDA_VERSION 3010 /* 3.1 */	#define CUDA_VERSION 3020 /* 3.2 */

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif

	typedef unsigned int CUdeviceptr; ///< CUDA device pointer


	typedef int CUdevice; ///< CUDA device	/**
	typedef struct CUctx_st *CUcontext; ///< CUDA context	* CUDA device pointer
	typedef struct CUmod_st *CUmodule; ///< CUDA module	*/
	typedef struct CUfunc_st *CUfunction; ///< CUDA function	#if __CUDA_API_VERSION >= 3020
	typedef struct CUarray_st *CUarray; ///< CUDA array
	typedef struct CUtexref_st *CUtexref; ///< CUDA texture reference
	typedef struct CUsurfref_st *CUsurfref; ///< CUDA surface reference
	typedef struct CUevent_st *CUevent; ///< CUDA event
	typedef struct CUstream_st *CUstream; ///< CUDA stream
	typedef struct CUgraphicsResource_st *CUgraphicsResource; ///< CUDA gra
	phics interop resource


	typedef struct CUuuid_st { ///< CUDA definition of UUID	#if defined(__x86_64) \|\| defined(AMD64) \|\| defined(_M_AMD64)
	char bytes[16];	typedef unsigned long long CUdeviceptr;
	} CUuuid;	#else
		typedef unsigned int CUdeviceptr;
		#endif


	/************************************	#endif /* __CUDA_API_VERSION >= 3020 */
	**
	** Enums	typedef int CUdevice; /**< CUDA device
	**	*/
	***********************************/	typedef struct CUctx_st CUcontext; /*< CUDA context
		*/
		typedef struct CUmod_st CUmodule; /*< CUDA module
		*/
		typedef struct CUfunc_st CUfunction; /*< CUDA functio
		n */
		typedef struct CUarray_st CUarray; /< CUDA array
		/
		typedef struct CUtexref_st CUtexref; /*< CUDA texture
		reference */
		typedef struct CUsurfref_st CUsurfref; /*< CUDA surface
		reference */
		typedef struct CUevent_st CUevent; /< CUDA event
		/
		typedef struct CUstream_st CUstream; /*< CUDA stream
		*/
		typedef struct CUgraphicsResource_st CUgraphicsResource; /*< CUDA graphic
		s interop resource */

		typedef struct CUuuid_st { /**< CUDA definit
		ion of UUID */
		char bytes[16];
		} CUuuid;

	/**	/**
	* Context creation flags	* Context creation flags
	*/	*/
	typedef enum CUctx_flags_enum {	typedef enum CUctx_flags_enum {

	CU_CTX_SCHED_AUTO = 0, ///< Automatic scheduling	CU_CTX_SCHED_AUTO = 0, /*< Automatic scheduling /
	CU_CTX_SCHED_SPIN = 1, ///< Set spin as default scheduling	CU_CTX_SCHED_SPIN = 1, /*< Set spin as default scheduling /
	CU_CTX_SCHED_YIELD = 2, ///< Set yield as default scheduling	CU_CTX_SCHED_YIELD = 2, /*< Set yield as default scheduling /
	CU_CTX_SCHED_MASK = 0x3,	CU_CTX_SCHED_MASK = 0x3,

	CU_CTX_BLOCKING_SYNC = 4, ///< Use blocking synchronization	CU_CTX_BLOCKING_SYNC = 4, /*< Use blocking synchronization /
	CU_CTX_MAP_HOST = 8, ///< Support mapped pinned allocations	CU_CTX_MAP_HOST = 8, /**< Support mapped pinned allocations
	CU_CTX_LMEM_RESIZE_TO_MAX = 16, ///< Keep local memory allocation after	*/
	launch	CU_CTX_LMEM_RESIZE_TO_MAX = 16, /**< Keep local memory allocation after
		launch */
	CU_CTX_FLAGS_MASK = 0x1f	CU_CTX_FLAGS_MASK = 0x1f
	} CUctx_flags;	} CUctx_flags;

	/**	/**
	* Event creation flags	* Event creation flags
	*/	*/
	typedef enum CUevent_flags_enum {	typedef enum CUevent_flags_enum {

	CU_EVENT_DEFAULT = 0, ///< Default event flag	CU_EVENT_DEFAULT = 0, /*< Default event flag /
	CU_EVENT_BLOCKING_SYNC = 1 ///< Event uses blocking synchronization	CU_EVENT_BLOCKING_SYNC = 1, /*< Event uses blocking synchronization
		/
		CU_EVENT_DISABLE_TIMING = 2 /*< Event will not record timing data /
	} CUevent_flags;	} CUevent_flags;

	/**	/**
	* Array formats	* Array formats
	*/	*/
	typedef enum CUarray_format_enum {	typedef enum CUarray_format_enum {

	CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit integers	CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /*< Unsigned 8-bit integers /
	CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit integers	CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /*< Unsigned 16-bit integers /
	CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit integers	CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /*< Unsigned 32-bit integers /
	CU_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit integers	CU_AD_FORMAT_SIGNED_INT8 = 0x08, /*< Signed 8-bit integers /
	CU_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit integers	CU_AD_FORMAT_SIGNED_INT16 = 0x09, /*< Signed 16-bit integers /
	CU_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit integers	CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /*< Signed 32-bit integers /
	CU_AD_FORMAT_HALF = 0x10, ///< 16-bit floating point	CU_AD_FORMAT_HALF = 0x10, /*< 16-bit floating point /
	CU_AD_FORMAT_FLOAT = 0x20 ///< 32-bit floating point	CU_AD_FORMAT_FLOAT = 0x20 /*< 32-bit floating point /
	} CUarray_format;	} CUarray_format;

	/**	/**
	* Texture reference addressing modes	* Texture reference addressing modes
	*/	*/
	typedef enum CUaddress_mode_enum {	typedef enum CUaddress_mode_enum {

	CU_TR_ADDRESS_MODE_WRAP = 0, ///< Wrapping address mode	CU_TR_ADDRESS_MODE_WRAP = 0, /*< Wrapping address mode /
	CU_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp to edge address mode	CU_TR_ADDRESS_MODE_CLAMP = 1, /*< Clamp to edge address mode /
	CU_TR_ADDRESS_MODE_MIRROR = 2 ///< Mirror address mode	CU_TR_ADDRESS_MODE_MIRROR = 2, /*< Mirror address mode /
		CU_TR_ADDRESS_MODE_BORDER = 3 /*< Border address mode /
	} CUaddress_mode;	} CUaddress_mode;

	/**	/**
	* Texture reference filtering modes	* Texture reference filtering modes
	*/	*/
	typedef enum CUfilter_mode_enum {	typedef enum CUfilter_mode_enum {

	CU_TR_FILTER_MODE_POINT = 0, ///< Point filter mode	CU_TR_FILTER_MODE_POINT = 0, /*< Point filter mode /
	CU_TR_FILTER_MODE_LINEAR = 1 ///< Linear filter mode	CU_TR_FILTER_MODE_LINEAR = 1 /*< Linear filter mode /
	} CUfilter_mode;	} CUfilter_mode;

	/**	/**
	* Device properties	* Device properties
	*/	*/
	typedef enum CUdevice_attribute_enum {	typedef enum CUdevice_attribute_enum {

	CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, ///< Maximum number of	CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximu
	threads per block	m number of threads per block */
	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, ///< Maximum block dime	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximu
	nsion X	m block dimension X */
	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, ///< Maximum block dime	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximu
	nsion Y	m block dimension Y */
	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, ///< Maximum block dime	CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximu
	nsion Z	m block dimension Z */
	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, ///< Maximum grid dimen	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximu
	sion X	m grid dimension X */
	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, ///< Maximum grid dimen	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximu
	sion Y	m grid dimension Y */
	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, ///< Maximum grid dimen	CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximu
	sion Z	m grid dimension Z */
	CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, ///< Maximum sh	CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximu
	ared memory available per block in bytes	m shared memory available per block in bytes */
	CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, ///< Deprecated, us	CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprec
	e CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK	ated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
	CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, ///< Memory available o	CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory
	n device for __constant__ variables in a CUDA C kernel in bytes	available on device for __constant__ variables in a CUDA C kernel in bytes
	CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, ///< Warp size in threa	*/
	ds	CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp s
	CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, ///< Maximum pitch in b	ize in threads */
	ytes allowed by memory copies	CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximu
	CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, ///< Maximum number	m pitch in bytes allowed by memory copies */
	of 32-bit registers available per block	CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximu
	CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, ///< Deprecated, use CU	m number of 32-bit registers available per block */
	_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK	CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprec
	CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, ///< Peak clock frequen	ated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
	cy in kilohertz	CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak c
	CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, ///< Alignment requirem	lock frequency in kilohertz */
	ent for textures	CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignm
		ent requirement for textures */
	CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, ///< Device can possibl	CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device
	y copy memory and execute a kernel concurrently	can possibly copy memory and execute a kernel concurrently */
	CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, ///< Number of multipro	CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number
	cessors on device	of multiprocessors on device */
	CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, ///< Specifies whether	CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specif
	there is a run time limit on kernels	ies whether there is a run time limit on kernels */
	CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, ///< Device is integrat	CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device
	ed with host memory	is integrated with host memory */
	CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, ///< Device can map hos	CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device
	t memory into CUDA address space	can map host memory into CUDA address space */
	CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, ///< Compute mode (See	CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Comput
	::CUcomputemode for details)	e mode (See ::CUcomputemode for details) */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, ///< Maximum 1D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximu
	re width	m 1D texture width */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, ///< Maximum 2D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximu
	re width	m 2D texture width */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,///< Maximum 2D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximu
	re height	m 2D texture height */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, ///< Maximum 3D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximu
	re width	m 3D texture width */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,///< Maximum 3D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximu
	re height	m 3D texture height */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, ///< Maximum 3D textu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximu
	re depth	m 3D texture depth */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, ///< Maximum te	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximu
	xture array width	m texture array width */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,///< Maximum te	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximu
	xture array height	m texture array height */
	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, ///< Maximu	CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximu
	m slices in a texture array	m slices in a texture array */
	CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, ///< Alignment requirement	CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignm
	for surfaces	ent requirement for surfaces */
	CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, ///< Device can possibly e	CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device
	xecute multiple kernels concurrently	can possibly execute multiple kernels concurrently */
	CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, ///< Device has ECC support enabl	CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device
	ed	has ECC support enabled */
	CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, ////< PCI bus ID of the device	CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bu
	CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34 ////< PCI device ID of the devic	s ID of the device */
	e	CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI de
		vice ID of the device */
		CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device
		is using TCC driver model */
	} CUdevice_attribute;	} CUdevice_attribute;

	/**	/**
	* Legacy device properties	* Legacy device properties
	*/	*/
	typedef struct CUdevprop_st {	typedef struct CUdevprop_st {

	int maxThreadsPerBlock; ///< Maximum number of threads per block	int maxThreadsPerBlock; /*< Maximum number of threads per block /
	int maxThreadsDim[3]; ///< Maximum size of each dimension of a bl	int maxThreadsDim[3]; /**< Maximum size of each dimension of a bl
	ock	ock */
	int maxGridSize[3]; ///< Maximum size of each dimension of a gr	int maxGridSize[3]; /**< Maximum size of each dimension of a gr
	id	id */
	int sharedMemPerBlock; ///< Shared memory available per block in b	int sharedMemPerBlock; /**< Shared memory available per block in b
	ytes	ytes */
	int totalConstantMemory; ///< Constant memory available on device in	int totalConstantMemory; /**< Constant memory available on device in
	bytes	bytes */
	int SIMDWidth; ///< Warp size in threads	int SIMDWidth; /*< Warp size in threads /
	int memPitch; ///< Maximum pitch in bytes allowed by memo	int memPitch; /**< Maximum pitch in bytes allowed by memo
	ry copies	ry copies */
	int regsPerBlock; ///< 32-bit registers available per block	int regsPerBlock; /*< 32-bit registers available per block
	int clockRate; ///< Clock frequency in kilohertz	/
	int textureAlign; ///< Alignment requirement for textures	int clockRate; /*< Clock frequency in kilohertz /
		int textureAlign; /*< Alignment requirement for textures /
	} CUdevprop;	} CUdevprop;

	/**	/**
	* Function properties	* Function properties
	*/	*/
	typedef enum CUfunction_attribute_enum {	typedef enum CUfunction_attribute_enum {
	/**	/**

	* The number of threads beyond which a launch of the function would fa	* The maximum number of threads per block, beyond which a launch of th
	il.	e
	* This number depends on both the function and the device on which the	* function would fail. This number depends on both the function and th
	* function is currently loaded.	e
		* device on which the function is currently loaded.
	*/	*/
	CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,	CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,

	/**	/**
	* The size in bytes of statically-allocated shared memory required by	* The size in bytes of statically-allocated shared memory required by
	* this function. This does not include dynamically-allocated shared	* this function. This does not include dynamically-allocated shared
	* memory requested by the user at runtime.	* memory requested by the user at runtime.
	*/	*/
	CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,	CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,

	/**	/**
	* The size in bytes of user-allocated constant memory required by this	* The size in bytes of user-allocated constant memory required by this
	* function.	* function.
	*/	*/
	CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,	CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,

	/**	/**

	* The size in bytes of thread local memory used by this function.	* The size in bytes of local memory used by each thread of this functi on.
	*/	*/
	CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,	CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,

	/**	/**
	* The number of registers used by each thread of this function.	* The number of registers used by each thread of this function.
	*/	*/
	CU_FUNC_ATTRIBUTE_NUM_REGS = 4,	CU_FUNC_ATTRIBUTE_NUM_REGS = 4,

	/**	/**

	* The PTX virtual architecture version for which the function was comp	* The PTX virtual architecture version for which the function was
	iled.	* compiled. This value is the major PTX version * 10 + the minor PTX
		* version, so a PTX version 1.3 function would return the value 13.
		* Note that this may return the undefined value of 0 for cubins
		* compiled prior to CUDA 3.0.
	*/	*/
	CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,	CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,

	/**	/**

	* The binary version for which the function was compiled.	* The binary architecture version for which the function was compiled.
		* This value is the major binary version * 10 + the minor binary versi
		on,
		* so a binary version 1.3 function would return the value 13. Note tha
		t
		* this will return a value of 10 for legacy cubins that do not have a
		* properly-encoded binary architecture version.
	*/	*/
	CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,	CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,

	CU_FUNC_ATTRIBUTE_MAX	CU_FUNC_ATTRIBUTE_MAX
	} CUfunction_attribute;	} CUfunction_attribute;

	/**	/**
	* Function cache configurations	* Function cache configurations
	*/	*/
	typedef enum CUfunc_cache_enum {	typedef enum CUfunc_cache_enum {

	CU_FUNC_CACHE_PREFER_NONE = 0x00,	CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memo
	CU_FUNC_CACHE_PREFER_SHARED = 0x01,	ry or L1 (default) */
	CU_FUNC_CACHE_PREFER_L1 = 0x02	CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory a
		nd smaller L1 cache */
		CU_FUNC_CACHE_PREFER_L1 = 0x02 /**< prefer larger L1 cache and sm
		aller shared memory */
	} CUfunc_cache;	} CUfunc_cache;

	/**	/**
	* Memory types	* Memory types
	*/	*/
	typedef enum CUmemorytype_enum {	typedef enum CUmemorytype_enum {

	CU_MEMORYTYPE_HOST = 0x01, ///< Host memory	CU_MEMORYTYPE_HOST = 0x01, /*< Host memory /
	CU_MEMORYTYPE_DEVICE = 0x02, ///< Device memory	CU_MEMORYTYPE_DEVICE = 0x02, /*< Device memory /
	CU_MEMORYTYPE_ARRAY = 0x03 ///< Array memory	CU_MEMORYTYPE_ARRAY = 0x03 /*< Array memory /
	} CUmemorytype;	} CUmemorytype;

	/**	/**
	* Compute Modes	* Compute Modes
	*/	*/
	typedef enum CUcomputemode_enum {	typedef enum CUcomputemode_enum {

	CU_COMPUTEMODE_DEFAULT = 0, ///< Default compute mode (Multiple	CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple con
	contexts allowed per device)	texts allowed per device) */
	CU_COMPUTEMODE_EXCLUSIVE = 1, ///< Compute-exclusive mode (Only on	CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive mode (Only one c
	e context can be present on this device at a time)	ontext can be present on this device at a time) */
	CU_COMPUTEMODE_PROHIBITED = 2 ///< Compute-prohibited mode (No con	CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contex
	texts can be created on this device at this time)	ts can be created on this device at this time) */
	} CUcomputemode;	} CUcomputemode;

	/**	/**
	* Online compiler options	* Online compiler options
	*/	*/
	typedef enum CUjit_option_enum	typedef enum CUjit_option_enum
	{	{
	/**	/**
	* Max number of registers that a thread may use.\n	* Max number of registers that a thread may use.\n
	* Option type: unsigned int	* Option type: unsigned int
	*/	*/

	CU_JIT_MAX_REGISTERS = 0,	CU_JIT_MAX_REGISTERS = 0,

	/**	/**
	* IN: Specifies minimum number of threads per block to target compilat ion	* IN: Specifies minimum number of threads per block to target compilat ion
	* for\n	* for\n
	* OUT: Returns the number of threads the compiler actually targeted.	* OUT: Returns the number of threads the compiler actually targeted.
	* This restricts the resource utilization fo the compiler (e.g. max	* This restricts the resource utilization fo the compiler (e.g. max
	* registers) such that a block with the given number of threads should be	* registers) such that a block with the given number of threads should be
	* able to launch based on register limitations. Note, this option does not	* able to launch based on register limitations. Note, this option does not
	* currently take into account any other resource limitations, such as	* currently take into account any other resource limitations, such as
	* shared memory utilization.\n	* shared memory utilization.\n

	skipping to change at line 368	skipping to change at line 443
	*/	*/
	CU_JIT_FALLBACK_STRATEGY	CU_JIT_FALLBACK_STRATEGY

	} CUjit_option;	} CUjit_option;

	/**	/**
	* Online compilation targets	* Online compilation targets
	*/	*/
	typedef enum CUjit_target_enum	typedef enum CUjit_target_enum
	{	{

	CU_TARGET_COMPUTE_10 = 0, ///< Compute device class 1.0	CU_TARGET_COMPUTE_10 = 0, /*< Compute device class 1.0 /
	CU_TARGET_COMPUTE_11, ///< Compute device class 1.1	CU_TARGET_COMPUTE_11, /*< Compute device class 1.1 /
	CU_TARGET_COMPUTE_12, ///< Compute device class 1.2	CU_TARGET_COMPUTE_12, /*< Compute device class 1.2 /
	CU_TARGET_COMPUTE_13, ///< Compute device class 1.3	CU_TARGET_COMPUTE_13, /*< Compute device class 1.3 /
	CU_TARGET_COMPUTE_20 ///< Compute device class 2.0	CU_TARGET_COMPUTE_20, /*< Compute device class 2.0 /
		CU_TARGET_COMPUTE_21 /*< Compute device class 2.1 /
	} CUjit_target;	} CUjit_target;

	/**	/**
	* Cubin matching fallback strategies	* Cubin matching fallback strategies
	*/	*/
	typedef enum CUjit_fallback_enum	typedef enum CUjit_fallback_enum
	{	{

	/** Prefer to compile ptx */	CU_PREFER_PTX = 0, /*< Prefer to compile ptx /
	CU_PREFER_PTX = 0,


	/** Prefer to fall back to compatible binary code */	CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code
	CU_PREFER_BINARY	*/

	} CUjit_fallback;	} CUjit_fallback;

	/**	/**
	* Flags to register a graphics resource	* Flags to register a graphics resource
	*/	*/
	typedef enum CUgraphicsRegisterFlags_enum {	typedef enum CUgraphicsRegisterFlags_enum {
	CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00	CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00
	} CUgraphicsRegisterFlags;	} CUgraphicsRegisterFlags;


	skipping to change at line 408	skipping to change at line 482
	typedef enum CUgraphicsMapResourceFlags_enum {	typedef enum CUgraphicsMapResourceFlags_enum {
	CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,	CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
	CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,	CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
	CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02	CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
	} CUgraphicsMapResourceFlags;	} CUgraphicsMapResourceFlags;

	/**	/**
	* Array indices for cube faces	* Array indices for cube faces
	*/	*/
	typedef enum CUarray_cubemap_face_enum {	typedef enum CUarray_cubemap_face_enum {

	CU_CUBEMAP_FACE_POSITIVE_X = 0x00, ///< Positive X face of cubemap	CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /*< Positive X face of cubemap /
	CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, ///< Negative X face of cubemap	CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /*< Negative X face of cubemap /
	CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, ///< Positive Y face of cubemap	CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /*< Positive Y face of cubemap /
	CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, ///< Negative Y face of cubemap	CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /*< Negative Y face of cubemap /
	CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, ///< Positive Z face of cubemap	CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /*< Positive Z face of cubemap /
	CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 ///< Negative Z face of cubemap	CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /*< Negative Z face of cubemap /
	} CUarray_cubemap_face;	} CUarray_cubemap_face;

	/**	/**
	* Limits	* Limits
	*/	*/
	typedef enum CUlimit_enum {	typedef enum CUlimit_enum {

	CU_LIMIT_STACK_SIZE = 0x00, ///< GPU thread stack size	CU_LIMIT_STACK_SIZE = 0x00, /*< GPU thread stack size /
	CU_LIMIT_PRINTF_FIFO_SIZE = 0x01 ///< GPU printf FIFO size	CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /*< GPU printf FIFO size /
		CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /*< GPU malloc heap size /
	} CUlimit;	} CUlimit;


	/************************************
	**
	** Error codes
	**
	***********************************/

	/**	/**
	* Error codes	* Error codes
	*/	*/
	typedef enum cudaError_enum {	typedef enum cudaError_enum {

		/**
		* The API call returned with no errors. In the case of query calls, th
		is
		* can also mean that the operation being queried is complete (see
		* ::cuEventQuery() and ::cuStreamQuery()).
		*/
		CUDA_SUCCESS = 0,


	CUDA_SUCCESS = 0, ///< No errors	/**
	CUDA_ERROR_INVALID_VALUE = 1, ///< Invalid value	* This indicates that one or more of the parameters passed to the API
	CUDA_ERROR_OUT_OF_MEMORY = 2, ///< Out of memory	call
	CUDA_ERROR_NOT_INITIALIZED = 3, ///< Driver not initia	* is not within an acceptable range of values.
	lized	*/
	CUDA_ERROR_DEINITIALIZED = 4, ///< Driver deinitiali	CUDA_ERROR_INVALID_VALUE = 1,
	zed


	CUDA_ERROR_NO_DEVICE = 100, ///< No CUDA-capable d	/**
	evice available	* The API call failed because it was unable to allocate enough memory
	CUDA_ERROR_INVALID_DEVICE = 101, ///< Invalid device	to
		* perform the requested operation.
		*/
		CUDA_ERROR_OUT_OF_MEMORY = 2,


	CUDA_ERROR_INVALID_IMAGE = 200, ///< Invalid kernel im	/**
	age	* This indicates that the CUDA driver has not been initialized with
	CUDA_ERROR_INVALID_CONTEXT = 201, ///< Invalid context	* ::cuInit() or that initialization has failed.
	CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, ///< Context already c	*/
	urrent	CUDA_ERROR_NOT_INITIALIZED = 3,
	CUDA_ERROR_MAP_FAILED = 205, ///< Map failed
	CUDA_ERROR_UNMAP_FAILED = 206, ///< Unmap failed
	CUDA_ERROR_ARRAY_IS_MAPPED = 207, ///< Array is mapped
	CUDA_ERROR_ALREADY_MAPPED = 208, ///< Already mapped
	CUDA_ERROR_NO_BINARY_FOR_GPU = 209, ///< No binary for GPU
	CUDA_ERROR_ALREADY_ACQUIRED = 210, ///< Already acquired
	CUDA_ERROR_NOT_MAPPED = 211, ///< Not mapped
	CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, ///< Mapped resource n
	ot available for access as an array
	CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, ///< Mapped resource n
	ot available for access as a pointer
	CUDA_ERROR_ECC_UNCORRECTABLE = 214, ///< Uncorrectable ECC
	error detected
	CUDA_ERROR_UNSUPPORTED_LIMIT = 215, ///< CUlimit not suppo
	rted by device


	CUDA_ERROR_INVALID_SOURCE = 300, ///< Invalid source	/**
	CUDA_ERROR_FILE_NOT_FOUND = 301, ///< File not found	* This indicates that the CUDA driver is in the process of shutting do
	CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, ///< Link to a shared	wn.
	object failed to resolve	*/
	CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, ///< Shared object ini	CUDA_ERROR_DEINITIALIZED = 4,
	tialization failed


	CUDA_ERROR_INVALID_HANDLE = 400, ///< Invalid handle	/**
		* This indicates that no CUDA-capable devices were detected by the ins
		talled
		* CUDA driver.
		*/
		CUDA_ERROR_NO_DEVICE = 100,


	CUDA_ERROR_NOT_FOUND = 500, ///< Not found	/**
		* This indicates that the device ordinal supplied by the user does not
		* correspond to a valid CUDA device.
		*/
		CUDA_ERROR_INVALID_DEVICE = 101,


	CUDA_ERROR_NOT_READY = 600, ///< CUDA not ready	/**
		* This indicates that the device kernel image is invalid. This can als
		o
		* indicate an invalid CUDA module.
		*/
		CUDA_ERROR_INVALID_IMAGE = 200,


	CUDA_ERROR_LAUNCH_FAILED = 700, ///< Launch failed	/**
	CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, ///< Launch exceeded r	* This most frequently indicates that there is no context bound to the
	esources	* current thread. This can also be returned if the context passed to a
	CUDA_ERROR_LAUNCH_TIMEOUT = 702, ///< Launch exceeded t	n
	imeout	* API call is not a valid handle (such as a context that has had
	CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, ///< Launch with incom	* ::cuCtxDestroy() invoked on it). This can also be returned if a user
	patible texturing	* mixes different API versions (i.e. 3010 context with 3020 API calls)
		.
		* See ::cuCtxGetApiVersion() for more details.
		*/
		CUDA_ERROR_INVALID_CONTEXT = 201,


	CUDA_ERROR_POINTER_IS_64BIT = 800, ///< Attempted to retr	/**
	ieve 64-bit pointer via 32-bit API function	* This indicated that the context being supplied as a parameter to the
	CUDA_ERROR_SIZE_IS_64BIT = 801, ///< Attempted to retr	* API call was already the active context.
	ieve 64-bit size via 32-bit API function	* \deprecated
		* This error return is deprecated as of CUDA 3.2. It is no longer an
		* error to attempt to push the active context via ::cuCtxPushCurrent()
		.
		*/
		CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,


	CUDA_ERROR_UNKNOWN = 999 ///< Unknown error	/**
		* This indicates that a map or register operation has failed.
		*/
		CUDA_ERROR_MAP_FAILED = 205,

		/**
		* This indicates that an unmap or unregister operation has failed.
		*/
		CUDA_ERROR_UNMAP_FAILED = 206,

		/**
		* This indicates that the specified array is currently mapped and thus
		* cannot be destroyed.
		*/
		CUDA_ERROR_ARRAY_IS_MAPPED = 207,

		/**
		* This indicates that the resource is already mapped.
		*/
		CUDA_ERROR_ALREADY_MAPPED = 208,

		/**
		* This indicates that there is no kernel image available that is suita
		ble
		* for the device. This can occur when a user specifies code generation
		* options for a particular CUDA source file that do not include the
		* corresponding device configuration.
		*/
		CUDA_ERROR_NO_BINARY_FOR_GPU = 209,

		/**
		* This indicates that a resource has already been acquired.
		*/
		CUDA_ERROR_ALREADY_ACQUIRED = 210,

		/**
		* This indicates that a resource is not mapped.
		*/
		CUDA_ERROR_NOT_MAPPED = 211,

		/**
		* This indicates that a mapped resource is not available for access as
		an
		* array.
		*/
		CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,

		/**
		* This indicates that a mapped resource is not available for access as
		a
		* pointer.
		*/
		CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,

		/**
		* This indicates that an uncorrectable ECC error was detected during
		* execution.
		*/
		CUDA_ERROR_ECC_UNCORRECTABLE = 214,

		/**
		* This indicates that the ::CUlimit passed to the API call is not
		* supported by the active device.
		*/
		CUDA_ERROR_UNSUPPORTED_LIMIT = 215,

		/**
		* This indicates that the device kernel source is invalid.
		*/
		CUDA_ERROR_INVALID_SOURCE = 300,

		/**
		* This indicates that the file specified was not found.
		*/
		CUDA_ERROR_FILE_NOT_FOUND = 301,

		/**
		* This indicates that a link to a shared object failed to resolve.
		*/
		CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,

		/**
		* This indicates that initialization of a shared object failed.
		*/
		CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,

		/**
		* This indicates that an OS call failed.
		*/
		CUDA_ERROR_OPERATING_SYSTEM = 304,

		/**
		* This indicates that a resource handle passed to the API call was not
		* valid. Resource handles are opaque types like ::CUstream and ::CUeve
		nt.
		*/
		CUDA_ERROR_INVALID_HANDLE = 400,

		/**
		* This indicates that a named symbol was not found. Examples of symbol
		s
		* are global/constant variable names, texture names, and surface names
		.
		*/
		CUDA_ERROR_NOT_FOUND = 500,

		/**
		* This indicates that asynchronous operations issued previously have n
		ot
		* completed yet. This result is not actually an error, but must be ind
		icated
		* differently than ::CUDA_SUCCESS (which indicates completion). Calls
		that
		* may return this value include ::cuEventQuery() and ::cuStreamQuery()
		.
		*/
		CUDA_ERROR_NOT_READY = 600,

		/**
		* An exception occurred on the device while executing a kernel. Common
		* causes include dereferencing an invalid device pointer and accessing
		* out of bounds shared memory. The context cannot be used, so it must
		* be destroyed (and a new one should be created). All existing device
		* memory allocations from this context are invalid and must be
		* reconstructed if the program is to continue using CUDA.
		*/
		CUDA_ERROR_LAUNCH_FAILED = 700,

		/**
		* This indicates that a launch did not occur because it did not have
		* appropriate resources. This error usually indicates that the user ha
		s
		* attempted to pass too many arguments to the device kernel, or the
		* kernel launch specifies too many threads for the kernel's register
		* count. Passing arguments of the wrong size (i.e. a 64-bit pointer
		* when a 32-bit int is expected) is equivalent to passing too many
		* arguments and can also result in this error.
		*/
		CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,

		/**
		* This indicates that the device kernel took too long to execute. This
		can
		* only occur if timeouts are enabled - see the device attribute
		* ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The
		* context cannot be used (and must be destroyed similar to
		* ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations
		from
		* this context are invalid and must be reconstructed if the program is
		to
		* continue using CUDA.
		*/
		CUDA_ERROR_LAUNCH_TIMEOUT = 702,

		/**
		* This error indicates a kernel launch that uses an incompatible textu
		ring
		* mode.
		*/
		CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,

		/**
		* This indicates that an unknown internal error has occurred.
		*/
		CUDA_ERROR_UNKNOWN = 999
	} CUresult;	} CUresult;

	/**	/**
	* If set, host memory is portable between CUDA contexts.	* If set, host memory is portable between CUDA contexts.
	* Flag for ::cuMemHostAlloc()	* Flag for ::cuMemHostAlloc()
	*/	*/
	#define CU_MEMHOSTALLOC_PORTABLE 0x01	#define CU_MEMHOSTALLOC_PORTABLE 0x01

	/**	/**
	* If set, host memory is mapped into CUDA address space and	* If set, host memory is mapped into CUDA address space and

	skipping to change at line 502	skipping to change at line 743
	#define CU_MEMHOSTALLOC_DEVICEMAP 0x02	#define CU_MEMHOSTALLOC_DEVICEMAP 0x02

	/**	/**
	* If set, host memory is allocated as write-combined - fast to write,	* If set, host memory is allocated as write-combined - fast to write,
	* faster to DMA, slow to read except via SSE4 streaming load instruction	* faster to DMA, slow to read except via SSE4 streaming load instruction
	* (MOVNTDQA).	* (MOVNTDQA).
	* Flag for ::cuMemHostAlloc()	* Flag for ::cuMemHostAlloc()
	*/	*/
	#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04	#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04


		#if __CUDA_API_VERSION >= 3020

	/**	/**
	* 2D memory copy parameters	* 2D memory copy parameters
	*/	*/
	typedef struct CUDA_MEMCPY2D_st {	typedef struct CUDA_MEMCPY2D_st {

		size_t srcXInBytes; /*< Source X in bytes /
		size_t srcY; /*< Source Y /


	unsigned int srcXInBytes, ///< Source X in bytes	CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
	srcY; ///< Source Y	y) */
	CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra	const void srcHost; /< Source host pointer /
	y)	CUdeviceptr srcDevice; /*< Source device pointer /
	const void *srcHost; ///< Source host pointer	CUarray srcArray; /*< Source array reference /
	CUdeviceptr srcDevice; ///< Source device pointer	size_t srcPitch; /**< Source pitch (ignored when src is arra
	CUarray srcArray; ///< Source array reference	y) */
	unsigned int srcPitch; ///< Source pitch (ignored when src is arra
	y)


	unsigned int dstXInBytes, ///< Destination X in bytes	size_t dstXInBytes; /*< Destination X in bytes /
	dstY; ///< Destination Y	size_t dstY; /*< Destination Y /
	CUmemorytype dstMemoryType; ///< Destination memory type (host, device,
	array)
	void *dstHost; ///< Destination host pointer
	CUdeviceptr dstDevice; ///< Destination device pointer
	CUarray dstArray; ///< Destination array reference
	unsigned int dstPitch; ///< Destination pitch (ignored when dst is
	array)


	unsigned int WidthInBytes; ///< Width of 2D memory copy in bytes	CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
	unsigned int Height; ///< Height of 2D memory copy	array) */
		void dstHost; /< Destination host pointer /
		CUdeviceptr dstDevice; /*< Destination device pointer /
		CUarray dstArray; /*< Destination array reference /
		size_t dstPitch; /**< Destination pitch (ignored when dst is
		array) */

		size_t WidthInBytes; /*< Width of 2D memory copy in bytes /
		size_t Height; /*< Height of 2D memory copy /
	} CUDA_MEMCPY2D;	} CUDA_MEMCPY2D;

	/**	/**
	* 3D memory copy parameters	* 3D memory copy parameters
	*/	*/
	typedef struct CUDA_MEMCPY3D_st {	typedef struct CUDA_MEMCPY3D_st {

		size_t srcXInBytes; /*< Source X in bytes /
		size_t srcY; /*< Source Y /
		size_t srcZ; /*< Source Z /
		size_t srcLOD; /*< Source LOD /
		CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
		y) */
		const void srcHost; /< Source host pointer /
		CUdeviceptr srcDevice; /*< Source device pointer /
		CUarray srcArray; /*< Source array reference /
		void reserved0; /< Must be NULL /
		size_t srcPitch; /**< Source pitch (ignored when src is arra
		y) */
		size_t srcHeight; /**< Source height (ignored when src is arr
		ay; may be 0 if Depth==1) */


	unsigned int srcXInBytes, ///< Source X in bytes	size_t dstXInBytes; /*< Destination X in bytes /
	srcY, ///< Source Y	size_t dstY; /*< Destination Y /
	srcZ; ///< Source Z	size_t dstZ; /*< Destination Z /
	unsigned int srcLOD; ///< Source LOD	size_t dstLOD; /*< Destination LOD /
	CUmemorytype srcMemoryType; ///< Source memory type (host, device, arra	CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
	y)	array) */
	const void *srcHost; ///< Source host pointer	void dstHost; /< Destination host pointer /
	CUdeviceptr srcDevice; ///< Source device pointer	CUdeviceptr dstDevice; /*< Destination device pointer /
	CUarray srcArray; ///< Source array reference	CUarray dstArray; /*< Destination array reference /
	void *reserved0; ///< Must be NULL	void reserved1; /< Must be NULL /
	unsigned int srcPitch; ///< Source pitch (ignored when src is arra	size_t dstPitch; /**< Destination pitch (ignored when dst is
	y)	array) */
	unsigned int srcHeight; ///< Source height (ignored when src is arr	size_t dstHeight; /**< Destination height (ignored when dst i
	ay; may be 0 if Depth==1)	s array; may be 0 if Depth==1) */

	unsigned int dstXInBytes, ///< Destination X in bytes
	dstY, ///< Destination Y
	dstZ; ///< Destination Z
	unsigned int dstLOD; ///< Destination LOD
	CUmemorytype dstMemoryType; ///< Destination memory type (host, device,
	array)
	void *dstHost; ///< Destination host pointer
	CUdeviceptr dstDevice; ///< Destination device pointer
	CUarray dstArray; ///< Destination array reference
	void *reserved1; ///< Must be NULL
	unsigned int dstPitch; ///< Destination pitch (ignored when dst is
	array)
	unsigned int dstHeight; ///< Destination height (ignored when dst i
	s array; may be 0 if Depth==1)


	unsigned int WidthInBytes; ///< Width of 3D memory copy in bytes	size_t WidthInBytes; /*< Width of 3D memory copy in bytes /
	unsigned int Height; ///< Height of 3D memory copy	size_t Height; /*< Height of 3D memory copy /
	unsigned int Depth; ///< Depth of 3D memory copy	size_t Depth; /*< Depth of 3D memory copy /
	} CUDA_MEMCPY3D;	} CUDA_MEMCPY3D;

	/**	/**
	* Array descriptor	* Array descriptor
	*/	*/

	typedef struct	typedef struct CUDA_ARRAY_DESCRIPTOR_st
	{	{

	unsigned int Width; ///< Width of array	size_t Width; /*< Width of array /
	unsigned int Height; ///< Height of array	size_t Height; /*< Height of array /

	CUarray_format Format; ///< Array format


	unsigned int NumChannels; ///< Channels per array element	CUarray_format Format; /*< Array format /
		unsigned int NumChannels; /*< Channels per array element /
	} CUDA_ARRAY_DESCRIPTOR;	} CUDA_ARRAY_DESCRIPTOR;

	/**	/**
	* 3D array descriptor	* 3D array descriptor
	*/	*/

	typedef struct	typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
	{	{

	unsigned int Width; ///< Width of 3D array	size_t Width; /*< Width of 3D array /
	unsigned int Height; ///< Height of 3D array	size_t Height; /*< Height of 3D array /
	unsigned int Depth; ///< Depth of 3D array	size_t Depth; /*< Depth of 3D array /

	CUarray_format Format; ///< Array format

	unsigned int NumChannels; ///< Channels per array element


	unsigned int Flags; ///< Flags	CUarray_format Format; /*< Array format /
		unsigned int NumChannels; /*< Channels per array element /
		unsigned int Flags; /*< Flags /
	} CUDA_ARRAY3D_DESCRIPTOR;	} CUDA_ARRAY3D_DESCRIPTOR;


	// if set, the CUDA array contains an array of 2D slices	#endif /* __CUDA_API_VERSION >= 3020 */
	// and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
	// the number of slices, not the depth of a 3D array.	/**
		* If set, the CUDA array contains an array of 2D slices
		* and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
		* the number of slices, not the depth of a 3D array.
		*/
	#define CUDA_ARRAY3D_2DARRAY 0x01	#define CUDA_ARRAY3D_2DARRAY 0x01


	// this flag must be set in order to bind a surface reference	/**
	// to the CUDA array	* This flag must be set in order to bind a surface reference
		* to the CUDA array
		*/
	#define CUDA_ARRAY3D_SURFACE_LDST 0x02	#define CUDA_ARRAY3D_SURFACE_LDST 0x02

	/**	/**
	* Override the texref format with a format inferred from the array.	* Override the texref format with a format inferred from the array.
	* Flag for ::cuTexRefSetArray()	* Flag for ::cuTexRefSetArray()
	*/	*/
	#define CU_TRSA_OVERRIDE_FORMAT 0x01	#define CU_TRSA_OVERRIDE_FORMAT 0x01

	/**	/**
	* Read the texture as integers rather than promoting the values to floats	* Read the texture as integers rather than promoting the values to floats

	skipping to change at line 619	skipping to change at line 865
	*/	*/
	#define CU_TRSF_READ_AS_INTEGER 0x01	#define CU_TRSF_READ_AS_INTEGER 0x01

	/**	/**
	* Use normalized texture coordinates in the range [0,1) instead of [0,dim) .	* Use normalized texture coordinates in the range [0,1) instead of [0,dim) .
	* Flag for ::cuTexRefSetFlags()	* Flag for ::cuTexRefSetFlags()
	*/	*/
	#define CU_TRSF_NORMALIZED_COORDINATES 0x02	#define CU_TRSF_NORMALIZED_COORDINATES 0x02

	/**	/**

		* Perform sRGB->linear conversion during texture read.
		* Flag for ::cuTexRefSetFlags()
		*/
		#define CU_TRSF_SRGB 0x10

		/**
	* For texture references loaded into the module, use default texunit from	* For texture references loaded into the module, use default texunit from
	* texture reference.	* texture reference.
	*/	*/
	#define CU_PARAM_TR_DEFAULT -1	#define CU_PARAM_TR_DEFAULT -1


	/** @} */
	/** @} / / END CUDA_TYPES */	/** @} / / END CUDA_TYPES */

	#ifdef _WIN32	#ifdef _WIN32
	#define CUDAAPI __stdcall	#define CUDAAPI __stdcall
	#else	#else
	#define CUDAAPI	#define CUDAAPI
	#endif	#endif


	/*********************************	/**
	** Initialization	* \defgroup CUDA_INITIALIZE Initialization
	*********************************/	*
	CUresult CUDAAPI cuInit(unsigned int Flags);	* This section describes the initialization functions of the low-level CUD
		A
		* driver application programming interface.
		*
		* @{
		*/


	/*********************************	/**
	** Driver Version Query	* \brief Initialize the CUDA driver API
	*********************************/	*
	CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);	* Initializes the driver API and must be called before any other function
		from
		* the driver API. Currently, the \p Flags parameter must be 0. If ::cuInit
		()
		* has not been called, any function from the driver API will return
		* ::CUDA_ERROR_NOT_INITIALIZED.
		*
		* \param Flags - Initialization flag for CUDA.
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*/
		CUresult CUDAAPI cuInit(unsigned int Flags);


	/************************************	/** @} / / END CUDA_INITIALIZE */
	**
	** Device management
	**
	***********************************/


	CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);	/**
	CUresult CUDAAPI cuDeviceGetCount(int *count);	* \defgroup CUDA_VERSION Version Management
	CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);	*
	CUresult CUDAAPI cuDeviceComputeCapability(int major, int minor, CUd	* This section describes the version management functions of the low-level
	evice dev);	* CUDA driver application programming interface.
	CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);	*
	CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);	* @{
	CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attr	*/
	ib, CUdevice dev);


	/************************************	/**
	**	* \brief Returns the CUDA driver version
	** Context management	*
	**	* Returns in \p *driverVersion the version number of the installed CUDA
	***********************************/	* driver. This function automatically returns ::CUDA_ERROR_INVALID_VALUE i
		f
		* the \p driverVersion argument is NULL.
		*
		* \param driverVersion - Returns the CUDA driver version
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*/
		CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);


	CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUde	/** @} / / END CUDA_VERSION */
	vice dev );
	CUresult CUDAAPI cuCtxDestroy( CUcontext ctx );
	CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);
	CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
	CUresult CUDAAPI cuCtxPushCurrent( CUcontext ctx );
	CUresult CUDAAPI cuCtxPopCurrent( CUcontext *pctx );
	CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);
	CUresult CUDAAPI cuCtxSynchronize(void);


	/************************************	/**
	**	* \defgroup CUDA_DEVICE Device Management
	** Module management	*
	**	* This section describes the device management functions of the low-level
	***********************************/	* CUDA driver application programming interface.
		*
		* @{
		*/


	CUresult CUDAAPI cuModuleLoad(CUmodule module, const char fname);	/**
	CUresult CUDAAPI cuModuleLoadData(CUmodule module, const void image)	* \brief Returns a handle to a compute device
	;	*
	CUresult CUDAAPI cuModuleLoadDataEx(CUmodule module, const void imag	* Returns in \p *device a device handle given an ordinal in the range <b>[
	e, unsigned int numOptions, CUjit_option options, void *optionValues);	0,
	CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule module, const void f	* ::cuDeviceGetCount()-1]</b>.
	atCubin);	*
	CUresult CUDAAPI cuModuleUnload(CUmodule hmod);	* \param device - Returned device handle
	CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod,	* \param ordinal - Device number to get handle for
	const char *name);	*
	CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr dptr, unsigned int by	* \return
	tes, CUmodule hmod, const char *name);	* ::CUDA_SUCCESS,
	CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, c	* ::CUDA_ERROR_DEINITIALIZED,
	onst char *name);	* ::CUDA_ERROR_NOT_INITIALIZED,
	CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod	* ::CUDA_ERROR_INVALID_CONTEXT,
	, const char *name);	* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetCount,
		* ::cuDeviceGetName,
		* ::cuDeviceGetProperties,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);


	/************************************	/**
	**	* \brief Returns the number of compute-capable devices
	** Memory management	*
	**	* Returns in \p *count the number of devices with compute capability great
	***********************************/	er
		* than or equal to 1.0 that are available for execution. If there is no su
		ch
		* device, ::cuDeviceGetCount() returns 0.
		*
		* \param count - Returned number of compute-capable devices
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetName,
		* ::cuDeviceGet,
		* ::cuDeviceGetProperties,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceGetCount(int *count);


	CUresult CUDAAPI cuMemGetInfo(unsigned int free, unsigned int total);	/**
		* \brief Returns an identifer string for the device
		*
		* Returns an ASCII string identifying the device \p dev in the NULL-termin
		ated
		* string pointed to by \p name. \p len specifies the maximum length of the
		* string that may be returned.
		*
		* \param name - Returned identifier string for the device
		* \param len - Maximum length of string to store in \p name
		* \param dev - Device to get identifier string for
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetCount,
		* ::cuDeviceGet,
		* ::cuDeviceGetProperties,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);


	CUresult CUDAAPI cuMemAlloc( CUdeviceptr *dptr, unsigned int bytesize);	/**
	CUresult CUDAAPI cuMemAllocPitch( CUdeviceptr *dptr,	* \brief Returns the compute capability of the device
	unsigned int *pPitch,	*
	unsigned int WidthInBytes,	* Returns in \p major and \p minor the major and minor revision numbers
	unsigned int Height,	that
	// size of biggest r/w to be performe	* define the compute capability of the device \p dev.
	d by kernels on this memory	*
	// 4, 8 or 16 bytes	* \param major - Major revision number
	unsigned int ElementSizeBytes	* \param minor - Minor revision number
	);	* \param dev - Device handle
	CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);	*
	CUresult CUDAAPI cuMemGetAddressRange( CUdeviceptr *pbase, unsigned int	* \return
	*psize, CUdeviceptr dptr );	* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetCount,
		* ::cuDeviceGetName,
		* ::cuDeviceGet,
		* ::cuDeviceGetProperties,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceComputeCapability(int major, int minor, CUdevice
		dev);


	CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize);	#if __CUDA_API_VERSION >= 3020
	CUresult CUDAAPI cuMemFreeHost(void *p);	/**
		* \brief Returns the total amount of memory on the device
		*
		* Returns in \p *bytes the total amount of memory available on the device
		* \p dev in bytes.
		*
		* \param bytes - Returned memory available on device in bytes
		* \param dev - Device handle
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetCount,
		* ::cuDeviceGetName,
		* ::cuDeviceGet,
		* ::cuDeviceGetProperties,
		*/
		CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
		#endif /* __CUDA_API_VERSION >= 3020 */


	CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned in	/**
	t Flags );	* \brief Returns properties for a selected device
		*
		* Returns in \p *prop the properties of device \p dev. The ::CUdevprop
		* structure is defined as:
		*
		* \code
		typedef struct CUdevprop_st {
		int maxThreadsPerBlock;
		int maxThreadsDim[3];
		int maxGridSize[3];
		int sharedMemPerBlock;
		int totalConstantMemory;
		int SIMDWidth;
		int memPitch;
		int regsPerBlock;
		int clockRate;
		int textureAlign
		} CUdevprop;
		* \endcode
		* where:
		*
		* - ::maxThreadsPerBlock is the maximum number of threads per block;
		* - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block;
		* - ::maxGridSize[3] is the maximum sizes of each dimension of a grid;
		* - ::sharedMemPerBlock is the total amount of shared memory available per
		* block in bytes;
		* - ::totalConstantMemory is the total amount of constant memory available
		on
		* the device in bytes;
		* - ::SIMDWidth is the warp size;
		* - ::memPitch is the maximum pitch allowed by the memory copy functions t
		hat
		* involve memory regions allocated through ::cuMemAllocPitch();
		* - ::regsPerBlock is the total number of registers available per block;
		* - ::clockRate is the clock frequency in kilohertz;
		* - ::textureAlign is the alignment requirement; texture base addresses th
		at
		* are aligned to ::textureAlign bytes do not need an offset applied to
		* texture fetches.
		*
		* \param prop - Returned properties of device
		* \param dev - Device to get properties for
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetAttribute,
		* ::cuDeviceGetCount,
		* ::cuDeviceGetName,
		* ::cuDeviceGet,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);


	CUresult CUDAAPI cuMemHostGetDevicePointer( CUdeviceptr pdptr, void p	/**
	, unsigned int Flags );	* \brief Returns information about the device
	CUresult CUDAAPI cuMemHostGetFlags( unsigned int pFlags, void p );	*
		* Returns in \p *pi the integer value of the attribute \p attrib on device
		* \p dev. The supported attributes are:
		* - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads
		per
		* block;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of
		* shared memory available to a thread block in bytes; this amount is sha
		red
		* by all thread blocks simultaneously resident on a multiprocessor;
		* - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on devic
		e for
		* __constant__ variables in a CUDA C kernel in bytes;
		* - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads;
		* - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the
		* memory copy functions that involve memory regions allocated through
		* ::cuMemAllocPitch();
		* - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bi
		t
		* registers available to a thread block; this number is shared by all th
		read
		* blocks simultaneously resident on a multiprocessor;
		* - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: Peak clock frequency in kilohertz;
		* - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; textur
		e
		* base addresses aligned to ::textureAlign bytes do not need an offset
		* applied to texture fetches;
		* - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently co
		py
		* memory between host and device while executing a kernel, or 0 if not;
		* - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors
		on
		* the device;
		* - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time li
		mit
		* for kernels executed on the device, or 0 if not;
		* - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with t
		he
		* memory subsystem, or 0 if not;
		* - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map hos
		t
		* memory into the CUDA address space, or 0 if not;
		* - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is curren
		tly
		* in. Available modes are as follows:
		* - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted an
		d
		* can have multiple CUDA contexts present at a single time.
		* - ::CU_COMPUTEMODE_EXCLUSIVE: Compute-exclusive mode - Device can have
		* only one CUDA context present on it at a time.
		* - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is
		* prohibited from creating new CUDA contexts.
		* - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports
		* executing multiple kernels within the same context simultaneously, or
		0 if
		* not. It is not guaranteed that multiple kernels will be resident
		* on the device concurrently so this feature should not be relied upon f
		or
		* correctness;
		* - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on
		the
		* device, 0 if error correction is disabled or not supported by the dev
		ice.
		* - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device.
		* - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) i
		dentifier
		* of the device.
		* - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC drive
		r. TCC
		is only available on Tesla hardware running Windows Vista or later.
		* \param pi - Returned device attribute value
		* \param attrib - Device attribute to query
		* \param dev - Device handle
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_DEVICE
		* \notefnerr
		*
		* \sa ::cuDeviceComputeCapability,
		* ::cuDeviceGetCount,
		* ::cuDeviceGetName,
		* ::cuDeviceGet,
		* ::cuDeviceGetProperties,
		* ::cuDeviceTotalMem
		*/
		CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, C
		Udevice dev);


	/************************************	/** @} / / END CUDA_DEVICE */
	**
	** Synchronous Memcpy
	**
	** Intra-device memcpy's done with these functions may execute in para
	llel with the CPU,
	** but if host memory is involved, they wait until the copy is done be
	fore returning.
	**
	***********************************/


	// 1D functions	/**
	// system <-> device memory	* \defgroup CUDA_CTX Context Management
	CUresult CUDAAPI cuMemcpyHtoD (CUdeviceptr dstDevice, const void *	*
	srcHost, unsigned int ByteCount );	* This section describes the context management functions of the low-level
	CUresult CUDAAPI cuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevic	* CUDA driver application programming interface.
	e, unsigned int ByteCount );	*
		* @{
		*/


	// device <-> device memory	#if __CUDA_API_VERSION >= 3020
	CUresult CUDAAPI cuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr	/**
	srcDevice, unsigned int ByteCount );	* \brief Create a CUDA context
		*
		* Creates a new CUDA context and associates it with the calling thread. Th
		e
		* \p flags parameter is described below. The context is created with a usa
		ge
		* count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy()
		or
		* ::cuCtxDetach() when done using the context. If a context is already cur
		rent
		* to the thread, it is supplanted by the newly created context and may be
		* restored by a subsequent call to ::cuCtxPopCurrent().
		*
		* The two LSBs of the \p flags parameter can be used to control how the OS
		* thread, which owns the CUDA context at the time of an API call, interact
		s
		* with the OS scheduler when waiting for results from the GPU.
		*
		* - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is ze
		ro,
		* uses a heuristic based on the number of active CUDA contexts in the
		* process \e C and the number of logical processors in the system \e P. If
		* \e C > \e P, then CUDA will yield to other OS threads when waiting for
		* the GPU, otherwise CUDA will not yield while waiting for results and
		* actively spin on the processor.
		*
		* - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for
		* results from the GPU. This can decrease latency when waiting for the GPU
		,
		* but may lower the performance of CPU threads if they are performing work
		in
		* parallel with the CUDA thread.
		*
		* - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting f
		or
		* results from the GPU. This can increase latency when waiting for the GPU
		,
		* but can increase the performance of CPU threads performing work in paral
		lel
		* with the GPU.
		*
		* - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a
		* synchronization primitive when waiting for the GPU to finish work.
		*
		* - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations.
		* This flag must be set in order to allocate pinned host memory that is
		* accessible to the GPU.
		*
		* - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory
		* after resizing local memory for a kernel. This can prevent thrashing by
		* local memory allocations when launching many kernels with high local
		* memory usage at the cost of potentially increased memory usage.
		*
		* <b>Note to Linux users</b>:
		*
		* Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode
		of
		* the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation w
		ill
		* also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device i
		s
		* set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active context
		on
		* the device. The function ::cuDeviceGetAttribute() can be used with
		* ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the
		* device. The <i>nvidia-smi</i> tool can be used to set the compute mode f
		or
		* devices. Documentation for <i>nvidia-smi</i> can be obtained by passing
		a
		* -h option to it.
		*
		* \param pctx - Returned context handle of the new context
		* \param flags - Context creation flags
		* \param dev - Device to create context on
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_DEVICE,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice
		dev);
		#endif /* __CUDA_API_VERSION >= 3020 */


	// device <-> array memory	/**
	CUresult CUDAAPI cuMemcpyDtoA ( CUarray dstArray, unsigned int dst	* \brief Destroy the current context or a floating CUDA context
	Offset, CUdeviceptr srcDevice, unsigned int ByteCount );	*
	CUresult CUDAAPI cuMemcpyAtoD ( CUdeviceptr dstDevice, CUarray src	* Destroys the CUDA context specified by \p ctx. If the context usage coun
	Array, unsigned int srcOffset, unsigned int ByteCount );	t is
		* not equal to 1, or the context is current to any CPU thread other than t
		he
		* current one, this function fails. Floating contexts (detached from a CPU
		* thread via ::cuCtxPopCurrent()) may be destroyed by this function.
		*
		* \param ctx - Context to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxDestroy(CUcontext ctx);


	// system <-> array memory	/**
	CUresult CUDAAPI cuMemcpyHtoA( CUarray dstArray, unsigned int dstO	* \brief Increment a context's usage-count
	ffset, const void *srcHost, unsigned int ByteCount );	*
	CUresult CUDAAPI cuMemcpyAtoH( void *dstHost, CUarray srcArray, un	* Increments the usage count of the context and passes back a context hand
	signed int srcOffset, unsigned int ByteCount );	le
		* in \p *pctx that must be passed to ::cuCtxDetach() when the application
		is
		* done with the context. ::cuCtxAttach() fails if there is no context curr
		ent
		* to the thread.
		*
		* Currently, the \p flags parameter must be 0.
		*
		* \param pctx - Returned context handle of the current context
		* \param flags - Context attach flags (must be 0)
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, unsigned int flags);


	// array <-> array memory	/**
	CUresult CUDAAPI cuMemcpyAtoA( CUarray dstArray, unsigned int dstO	* \brief Decrement a context's usage-count
	ffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount );	*
		* Decrements the usage count of the context \p ctx, and destroys the conte
		xt
		* if the usage count goes to 0. The context must be a handle that was pass
		ed
		* back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the
		* calling thread.
		*
		* \param ctx - Context to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxDetach(CUcontext ctx);


	// 2D memcpy	/**
		* \brief Pushes a floating context on the current CPU thread
		*
		* Pushes the given context \p ctx onto the CPU thread's stack of current
		* contexts. The specified context becomes the CPU thread's current context
		, so
		* all CUDA functions that operate on the current context are affected.
		*
		* The previous current context may be made current again by calling
		* ::cuCtxDestroy() or ::cuCtxPopCurrent().
		*
		* The context must be "floating," i.e. not attached to any thread. Context
		s are
		* made to float by calling ::cuCtxPopCurrent().
		*
		* \param ctx - Floating context to attach
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx );


	CUresult CUDAAPI cuMemcpy2D( const CUDA_MEMCPY2D *pCopy );	/**
	CUresult CUDAAPI cuMemcpy2DUnaligned( const CUDA_MEMCPY2D *pCopy )	* \brief Pops the current CUDA context from the current CPU thread
	;	*
		* Pops the current CUDA context from the CPU thread. The CUDA context must
		* have a usage count of 1. CUDA contexts have a usage count of 1 upon
		* creation; the usage count may be incremented with ::cuCtxAttach() and
		* decremented with ::cuCtxDetach().
		*
		* If successful, ::cuCtxPopCurrent() passes back the old context handle in
		* \p *pctx. That context may then be made current to a different CPU threa
		d
		* by calling ::cuCtxPushCurrent().
		*
		* Floating contexts may be destroyed by calling ::cuCtxDestroy().
		*
		* If a context was current to the CPU thread before ::cuCtxCreate() or
		* ::cuCtxPushCurrent() was called, this function makes that context curren
		t to
		* the CPU thread again.
		*
		* \param pctx - Returned new context handle
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx);


	// 3D memcpy	/**
		* \brief Returns the device ID for the current context
		*
		* Returns in \p *device the ordinal of the current context's device.
		*
		* \param device - Returned device ID for the current context
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxGetDevice(CUdevice *device);


	CUresult CUDAAPI cuMemcpy3D( const CUDA_MEMCPY3D *pCopy );	/**
		* \brief Block for a context's tasks to complete
		*
		* Blocks until the device has completed all preceding requested tasks.
		* ::cuCtxSynchronize() returns an error if one of the preceding tasks fail
		ed.
		* If the context was created with the ::CU_CTX_BLOCKING_SYNC flag, the CPU
		* thread will block until the GPU context has finished its work.
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit
		*/
		CUresult CUDAAPI cuCtxSynchronize(void);


	/************************************	/**
	**	* \brief Set resource limits
	** Asynchronous Memcpy	*
	**	* Setting \p limit to \p value is a request by the application to update
	** Any host memory involved must be DMA'able (e.g., allocated with cuM	* the current limit maintained by the context. The driver is free to
	emAllocHost).	* modify the requested value to meet h/w requirements (this could be
	** memcpy's done with these functions execute in parallel with the CPU	* clamping to minimum or maximum values, rounding up to nearest element
	and, if	* size, etc). The application can use ::cuCtxGetLimit() to find out exact
	** the hardware is available, may execute in parallel with the GPU.	ly
	** Asynchronous memcpy must be accompanied by appropriate stream synch	* what the limit has been set to.
	ronization.	*
	**	* Setting each ::CUlimit has its own specific restrictions, so each is
	***********************************/	* discussed here.
		*
		* - ::CU_LIMIT_STACK_SIZE controls the stack size of each GPU thread.
		* This limit is only applicable to devices of compute capability
		* 2.0 and higher. Attempting to set this limit on devices of
		* compute capability less than 2.0 will result in the error
		* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
		*
		* - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size of the FIFO used
		* by the ::printf() device system call. Setting
		* ::CU_LIMIT_PRINTF_FIFO_SIZE must be performed before launching any
		* kernel that uses the ::printf() device system call, otherwise
		* ::CUDA_ERROR_INVALID_VALUE will be returned.
		* This limit is only applicable to devices of compute capability
		* 2.0 and higher. Attempting to set this limit on devices of
		* compute capability less than 2.0 will result in the error
		* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
		*
		* - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size of the heap used
		* by the ::malloc() and ::free() device system calls. Setting
		* ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching
		* any kernel that uses the ::malloc() or ::free() device system calls,
		* otherwise ::CUDA_ERROR_INVALID_VALUE will be returned.
		* This limit is only applicable to devices of compute capability
		* 2.0 and higher. Attempting to set this limit on devices of
		* compute capability less than 2.0 will result in the error
		* ::CUDA_ERROR_UNSUPPORTED_LIMIT being returned.
		*
		* \param limit - Limit to set
		* \param value - Size in bytes of limit
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_UNSUPPORTED_LIMIT
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value);


	// 1D functions	/**
	// system <-> device memory	* \brief Returns resource limits
	CUresult CUDAAPI cuMemcpyHtoDAsync (CUdeviceptr dstDevice,	*
	const void *srcHost, unsigned int ByteCount, CUstream hStream )	* Returns in \p *pvalue the current size of \p limit. The supported
	;	* ::CUlimit values are:
	CUresult CUDAAPI cuMemcpyDtoHAsync (void *dstHost,	* - ::CU_LIMIT_STACK_SIZE: stack size of each GPU thread;
	CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream	* - ::CU_LIMIT_PRINTF_FIFO_SIZE: size of the FIFO used by the
	);	* ::printf() device system call.
		* - ::CU_LIMIT_MALLOC_HEAP_SIZE: size of the heap used by the
		* ::malloc() and ::free() device system calls;
		*
		* \param limit - Limit to query
		* \param pvalue - Returned size in bytes of limit
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_UNSUPPORTED_LIMIT
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);


	// device <-> device memory	/**
	CUresult CUDAAPI cuMemcpyDtoDAsync (CUdeviceptr dstDevice,	* \brief Returns the preferred cache configuration for the current context
	CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream	.
	);	*
		* On devices where the L1 cache and shared memory use the same hardware
		* resources, this returns through \p pconfig the preferred cache configura
		tion
		* for the current context. This is only a preference. The driver will use
		* the requested configuration if possible, but it is free to choose a diff
		erent
		* configuration if required to execute functions.
		*
		* This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices
		* where the size of the L1 cache and shared memory are fixed.
		*
		* The supported cache configurations are:
		* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
		fault)
		* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
		L1 cache
		* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
		emory
		*
		* \param pconfig - Returned cache configuration
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize,
		* ::cuFuncSetCacheConfig
		*/
		CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);


	// system <-> array memory	/**
	CUresult CUDAAPI cuMemcpyHtoAAsync( CUarray dstArray, unsigned int	* \brief Sets the preferred cache configuration for the current context.
	dstOffset,	*
	const void *srcHost, unsigned int ByteCount, CUstream hStream )	* On devices where the L1 cache and shared memory use the same hardware
	;	* resources, this sets through \p config the preferred cache configuration
	CUresult CUDAAPI cuMemcpyAtoHAsync( void *dstHost, CUarray srcArra	for
	y, unsigned int srcOffset,	* the current context. This is only a preference. The driver will use
	unsigned int ByteCount, CUstream hStream );	* the requested configuration if possible, but it is free to choose a diff
		erent
		* configuration if required to execute the function. Any function preferen
		ce
		* set via ::cuFuncSetCacheConfig() will be preferred over this context-wid
		e
		* setting. Setting the context-wide cache configuration to
		* ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to pre
		fer
		* to not change the cache configuration unless required to launch the kern
		el.
		*
		* This setting does nothing on devices where the size of the L1 cache and
		* shared memory are fixed.
		*
		* Launching a kernel with a different preference than the most recent
		* preference setting may insert a device-side synchronization point.
		*
		* The supported cache configurations are:
		* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
		fault)
		* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
		L1 cache
		* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
		emory
		*
		* \param config - Requested cache configuration
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetApiVersion,
		* ::cuCtxGetCacheConfig,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize,
		* ::cuFuncSetCacheConfig
		*/
		CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config);


	// 2D memcpy	/**
	CUresult CUDAAPI cuMemcpy2DAsync( const CUDA_MEMCPY2D *pCopy, CUst	* \brief Gets the context's API version.
	ream hStream );	*
		* Returns the API version used to create \p ctx in \p version. If \p ctx
		* is NULL, returns the API version used to create the currently bound
		* context.
		*
		* This wil return the API version used to create a context (for example,
		* 3010 or 3020), which library developers can use to direct callers to a
		* specific API version. Note that this API version may not be the same as
		* returned by cuDriverGetVersion.
		*
		* \param ctx - Context to check
		* \param version - Pointer to version
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa ::cuCtxAttach,
		* ::cuCtxCreate,
		* ::cuCtxDestroy,
		* ::cuCtxDetach,
		* ::cuCtxGetDevice,
		* ::cuCtxGetLimit,
		* ::cuCtxPopCurrent,
		* ::cuCtxPushCurrent,
		* ::cuCtxSetCacheConfig,
		* ::cuCtxSetLimit,
		* ::cuCtxSynchronize
		*/
		CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version);


	// 3D memcpy	/** @} / / END CUDA_CTX */
	CUresult CUDAAPI cuMemcpy3DAsync( const CUDA_MEMCPY3D *pCopy, CUst
	ream hStream );


	/************************************	/**
	**	* \defgroup CUDA_MODULE Module Management
	** Memset	*
	**	* This section describes the module management functions of the low-level
	***********************************/	CUDA
	CUresult CUDAAPI cuMemsetD8( CUdeviceptr dstDevice, unsigned char	* driver application programming interface.
	uc, unsigned int N );	*
	CUresult CUDAAPI cuMemsetD16( CUdeviceptr dstDevice, unsigned shor	* @{
	t us, unsigned int N );	*/
	CUresult CUDAAPI cuMemsetD32( CUdeviceptr dstDevice, unsigned int
	ui, unsigned int N );


	CUresult CUDAAPI cuMemsetD2D8( CUdeviceptr dstDevice, unsigned int	/**
	dstPitch, unsigned char uc, unsigned int Width, unsigned int Height );	* \brief Loads a compute module
	CUresult CUDAAPI cuMemsetD2D16( CUdeviceptr dstDevice, unsigned in	*
	t dstPitch, unsigned short us, unsigned int Width, unsigned int Height );	* Takes a filename \p fname and loads the corresponding module \p module i
	CUresult CUDAAPI cuMemsetD2D32( CUdeviceptr dstDevice, unsigned in	nto
	t dstPitch, unsigned int ui, unsigned int Width, unsigned int Height );	* the current context. The CUDA driver API does not attempt to lazily
		* allocate the resources needed by a module; if the memory for functions a
		nd
		* data (constant and global) needed by the module cannot be allocated,
		* ::cuModuleLoad() fails. The file should be a \e cubin file as output by
		* \b nvcc or a \e PTX file, either as output by \b nvcc or handwrtten.
		*
		* \param module - Returned module
		* \param fname - Filename of module to load
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_FILE_NOT_FOUND,
		* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleLoad(CUmodule module, const char fname);


	/************************************	/**
	**	* \brief Load a module's data
	** Function management	*
	**	* Takes a pointer \p image and loads the corresponding module \p module in
	***********************************/	to
		* the current context. The pointer may be obtained by mapping a \e cubin o
		r
		* \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text
		* string, or incorporating a \e cubin object into the executable resources
		* and using operating system calls such as Windows \c FindResource() to
		* obtain the pointer.
		*
		* \param module - Returned module
		* \param image - Module data to load
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleLoadData(CUmodule module, const void image);


	CUresult CUDAAPI cuFuncSetBlockShape (CUfunction hfunc, int x, int y, i	/**
	nt z);	* \brief Load a module's data with options
	CUresult CUDAAPI cuFuncSetSharedSize (CUfunction hfunc, unsigned int by	*
	tes);	* Takes a pointer \p image and loads the corresponding module \p module in
	CUresult CUDAAPI cuFuncGetAttribute (int *pi, CUfunction_attribute attr	to
	ib, CUfunction hfunc);	* the current context. The pointer may be obtained by mapping a \e cubin o
	CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache co	r
	nfig);	* \e PTX file, passing a \e cubin or \e PTX file as a NULL-terminated text
		* string, or incorporating a \e cubin object into the executable resources
		* and using operating system calls such as Windows \c FindResource() to
		* obtain the pointer. Options are passed as an array via \p options and an
		y
		* corresponding parameters are passed in \p optionValues. The number of to
		tal
		* options is supplied via \p numOptions. Any outputs will be returned via
		* \p optionValues. Supported options are (types for the option values are
		* specified in parentheses after the option name):
		*
		* - ::CU_JIT_MAX_REGISTERS: (unsigned int) input specifies the maximum num
		ber
		* of registers per thread;
		* - ::CU_JIT_THREADS_PER_BLOCK: (unsigned int) input specifies number of
		* threads per block to target compilation for; output returns the number o
		f
		* threads the compiler actually targeted;
		* - ::CU_JIT_WALL_TIME: (float) output returns the float value of wall clo
		ck
		* time, in milliseconds, spent compiling the \e PTX code;
		* - ::CU_JIT_INFO_LOG_BUFFER: (char*) input is a pointer to a buffer in
		* which to print any informational log messages from \e PTX assembly (the
		* buffer size is specified via option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)
		;
		* - ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size
		in
		* bytes of the buffer; output is the number of bytes filled with messages;
		* - ::CU_JIT_ERROR_LOG_BUFFER: (char*) input is a pointer to a buffer in
		* which to print any error log messages from \e PTX assembly (the buffer s
		ize
		* is specified via option ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES);
		* - ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: (unsigned int) input is the size
		in
		* bytes of the buffer; output is the number of bytes filled with messages;
		* - ::CU_JIT_OPTIMIZATION_LEVEL: (unsigned int) input is the level of
		* optimization to apply to generated code (0 - 4), with 4 being the defaul
		t
		* and highest level;
		* - ::CU_JIT_TARGET_FROM_CUCONTEXT: (No option value) causes compilation
		* target to be determined based on current attached context (default);
		* - ::CU_JIT_TARGET: (unsigned int for enumerated type ::CUjit_target_enum
		)
		* input is the compilation target based on supplied ::CUjit_target_enum;
		* possible values are:
		* - ::CU_TARGET_COMPUTE_10
		* - ::CU_TARGET_COMPUTE_11
		* - ::CU_TARGET_COMPUTE_12
		* - ::CU_TARGET_COMPUTE_13
		* - ::CU_TARGET_COMPUTE_20
		* - ::CU_JIT_FALLBACK_STRATEGY: (unsigned int for enumerated type
		* ::CUjit_fallback_enum) chooses fallback strategy if matching cubin is no
		t
		* found; possible values are:
		* - ::CU_PREFER_PTX
		* - ::CU_PREFER_BINARY
		*
		* \param module - Returned module
		* \param image - Module data to load
		* \param numOptions - Number of options
		* \param options - Options for JIT
		* \param optionValues - Option values for JIT
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_NO_BINARY_FOR_GPU,
		* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleLoadDataEx(CUmodule module, const void image, un
		signed int numOptions, CUjit_option options, void *optionValues);


	/************************************	/**
	**	* \brief Load a module's data
	** Array management	*
	**	* Takes a pointer \p fatCubin and loads the corresponding module \p module
	***********************************/	* into the current context. The pointer represents a <i>fat binary</i> obj
		ect,
		* which is a collection of different \e cubin files, all representing the
		same
		* device code, but compiled and optimized for different architectures. The
		re
		* is currently no documented API for constructing and using fat binary obj
		ects
		* by programmers, and therefore this function is an internal function in t
		his
		* version of CUDA. More information can be found in the \b nvcc document.
		*
		* \param module - Returned module
		* \param fatCubin - Fat binary to load
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_NO_BINARY_FOR_GPU,
		* ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule module, const void fatCub
		in);


	CUresult CUDAAPI cuArrayCreate( CUarray *pHandle, const CUDA_ARRAY_DES	/**
	CRIPTOR *pAllocateArray );	* \brief Unloads a module
	CUresult CUDAAPI cuArrayGetDescriptor( CUDA_ARRAY_DESCRIPTOR *pArrayDe	*
	scriptor, CUarray hArray );	* Unloads a module \p hmod from the current context.
	CUresult CUDAAPI cuArrayDestroy( CUarray hArray );	*
		* \param hmod - Module to unload
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary
		*/
		CUresult CUDAAPI cuModuleUnload(CUmodule hmod);


	CUresult CUDAAPI cuArray3DCreate( CUarray *pHandle, const CUDA_ARRAY3D	/**
	_DESCRIPTOR *pAllocateArray );	* \brief Returns a function handle
	CUresult CUDAAPI cuArray3DGetDescriptor( CUDA_ARRAY3D_DESCRIPTOR *pArr	*
	ayDescriptor, CUarray hArray );	* Returns in \p *hfunc the handle of the function of name \p name located
		in
		* module \p hmod. If no function of that name exists, ::cuModuleGetFunctio
		n()
		* returns ::CUDA_ERROR_NOT_FOUND.
		*
		* \param hfunc - Returned function handle
		* \param hmod - Module to retrieve function from
		* \param name - Name of function to retrieve
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND
		* \notefnerr
		*
		* \sa ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, cons
		t char *name);


	/************************************	#if __CUDA_API_VERSION >= 3020
	**	/**
	** Texture reference management	* \brief Returns a global pointer from a module
	**	*
	***********************************/	* Returns in \p dptr and \p bytes the base pointer and size of the
	CUresult CUDAAPI cuTexRefCreate( CUtexref *pTexRef );	* global of name \p name located in module \p hmod. If no variable of that
	CUresult CUDAAPI cuTexRefDestroy( CUtexref hTexRef );	name
		* exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND. Both
		* parameters \p dptr and \p bytes are optional. If one of them is
		* NULL, it is ignored.
		*
		* \param dptr - Returned global device pointer
		* \param bytes - Returned global size in bytes
		* \param hmod - Module to retrieve global from
		* \param name - Name of global to retrieve
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr dptr, size_t bytes, CUmodu
		le hmod, const char *name);
		#endif /* __CUDA_API_VERSION >= 3020 */


	CUresult CUDAAPI cuTexRefSetArray( CUtexref hTexRef, CUarray hArray, u	/**
	nsigned int Flags );	* \brief Returns a handle to a texture reference
	CUresult CUDAAPI cuTexRefSetAddress( unsigned int *ByteOffset, CUtexre	*
	f hTexRef, CUdeviceptr dptr, unsigned int bytes );	* Returns in \p *pTexRef the handle of the texture reference of name \p na
	CUresult CUDAAPI cuTexRefSetAddress2D( CUtexref hTexRef, const CUDA_AR	me
	RAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch);	* in the module \p hmod. If no texture reference of that name exists,
	CUresult CUDAAPI cuTexRefSetFormat( CUtexref hTexRef, CUarray_format f	* ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture refer
	mt, int NumPackedComponents );	ence
	CUresult CUDAAPI cuTexRefSetAddressMode( CUtexref hTexRef, int dim, CU	* handle should not be destroyed, since it will be destroyed when the modu
	address_mode am );	le
	CUresult CUDAAPI cuTexRefSetFilterMode( CUtexref hTexRef, CUfilter_mod	* is unloaded.
	e fm );	*
	CUresult CUDAAPI cuTexRefSetFlags( CUtexref hTexRef, unsigned int Flag	* \param pTexRef - Returned texture reference
	s );	* \param hmod - Module to retrieve texture reference from
		* \param name - Name of texture reference to retrieve
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetSurfRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const
		char *name);


	CUresult CUDAAPI cuTexRefGetAddress( CUdeviceptr *pdptr, CUtexref hTex	/**
	Ref );	* \brief Returns a handle to a surface reference
	CUresult CUDAAPI cuTexRefGetArray( CUarray *phArray, CUtexref hTexRef	*
	);	* Returns in \p *pSurfRef the handle of the surface reference of name \p n
	CUresult CUDAAPI cuTexRefGetAddressMode( CUaddress_mode *pam, CUtexref	ame
	hTexRef, int dim );	* in the module \p hmod. If no surface reference of that name exists,
	CUresult CUDAAPI cuTexRefGetFilterMode( CUfilter_mode *pfm, CUtexref h	* ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND.
	TexRef );	*
	CUresult CUDAAPI cuTexRefGetFormat( CUarray_format pFormat, int pNum	* \param pSurfRef - Returned surface reference
	Channels, CUtexref hTexRef );	* \param hmod - Module to retrieve surface reference from
	CUresult CUDAAPI cuTexRefGetFlags( unsigned int *pFlags, CUtexref hTex	* \param name - Name of surface reference to retrieve
	Ref );	*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_FOUND
		* \notefnerr
		*
		* \sa ::cuModuleGetFunction,
		* ::cuModuleGetGlobal,
		* ::cuModuleGetTexRef,
		* ::cuModuleLoad,
		* ::cuModuleLoadData,
		* ::cuModuleLoadDataEx,
		* ::cuModuleLoadFatBinary,
		* ::cuModuleUnload
		*/
		CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, con
		st char *name);


	/************************************	/** @} / / END CUDA_MODULE */
	**
	** Surface reference management
	**
	***********************************/


	CUresult CUDAAPI cuSurfRefSetArray( CUsurfref hSurfRef, CUarray hArray	/**
	, unsigned int Flags );	* \defgroup CUDA_MEM Memory Management
	CUresult CUDAAPI cuSurfRefGetArray( CUarray *phArray, CUsurfref hSurfR	*
	ef );	* This section describes the memory management functions of the low-level
		CUDA
		* driver application programming interface.
		*
		* @{
		*/


	/************************************	#if __CUDA_API_VERSION >= 3020
	**	/**
	** Parameter management	* \brief Gets free and total memory
	**	*
	***********************************/	* Returns in \p free and \p total respectively, the free and total amoun
		t of
		* memory available for allocation by the CUDA context, in bytes.
		*
		* \param free - Returned free memory in bytes
		* \param total - Returned total memory in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemGetInfo(size_t free, size_t total);


	CUresult CUDAAPI cuParamSetSize (CUfunction hfunc, unsigned int numbyt	/**
	es);	* \brief Allocates device memory
	CUresult CUDAAPI cuParamSeti (CUfunction hfunc, int offset, unsigne	*
	d int value);	* Allocates \p bytesize bytes of linear memory on the device and returns i
	CUresult CUDAAPI cuParamSetf (CUfunction hfunc, int offset, float v	n
	alue);	* \p *dptr a pointer to the allocated memory. The allocated memory is suit
	CUresult CUDAAPI cuParamSetv (CUfunction hfunc, int offset, void *p	ably
	tr, unsigned int numbytes);	* aligned for any kind of variable. The memory is not cleared. If \p bytes
	CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtex	ize
	ref hTexRef);	* is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE.
		*
		* \param dptr - Returned device pointer
		* \param bytesize - Requested allocation size in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize);


	/************************************	/**
	**	* \brief Allocates pitched device memory
	** Launch functions	*
	**	* Allocates at least \p WidthInBytes * \p Height bytes of linear memory on
	***********************************/	* the device and returns in \p *dptr a pointer to the allocated memory. Th
		e
		* function may pad the allocation to ensure that corresponding pointers in
		* any given row will continue to meet the alignment requirements for
		* coalescing as the address is updated from row to row. \p ElementSizeByte
		s
		* specifies the size of the largest reads and writes that will be performe
		d
		* on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coales
		ced
		* memory transactions are not possible on other data sizes). If
		* \p ElementSizeBytes is smaller than the actual read/write size of a kern
		el,
		* the kernel will run correctly, but possibly at reduced speed. The pitch
		* returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of t
		he
		* allocation. The intended usage of pitch is as a separate parameter of th
		e
		* allocation, used to compute addresses within the 2D array. Given the row
		* and column of an array element of type \b T, the address is computed as:
		* \code
		T* pElement = (T)((char)BaseAddress + Row * Pitch) + Column;
		* \endcode
		*
		* The pitch returned by ::cuMemAllocPitch() is guaranteed to work with
		* ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it
		is
		* recommended that programmers consider performing pitch allocations using
		* ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this
		is
		* especially true if the application will be performing 2D memory copies
		* between different regions of device memory (whether linear memory or CUD
		A
		* arrays).
		*
		* The byte alignment of the pitch returned by ::cuMemAllocPitch() is guara
		nteed
		* to match or exceed the alignment requirement for texture binding with
		* ::cuTexRefSetAddress2D().
		*
		* \param dptr - Returned device pointer
		* \param pPitch - Returned pitch of allocation in bytes
		* \param WidthInBytes - Requested allocation width in bytes
		* \param Height - Requested allocation height in rows
		* \param ElementSizeBytes - Size of largest reads/writes for range
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr dptr, size_t pPitch, size_t
		WidthInBytes, size_t Height, unsigned int ElementSizeBytes);


	CUresult CUDAAPI cuLaunch ( CUfunction f );	/**
	CUresult CUDAAPI cuLaunchGrid (CUfunction f, int grid_width, int grid_h	* \brief Frees device memory
	eight);	*
	CUresult CUDAAPI cuLaunchGridAsync( CUfunction f, int grid_width, int g	* Frees the memory space pointed to by \p dptr, which must have been retur
	rid_height, CUstream hStream );	ned
		* by a previous call to ::cuMemAlloc() or ::cuMemAllocPitch().
		*
		* \param dptr - Pointer to memory to free
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);


	/************************************	/**
	**	* \brief Get information on memory allocations
	** Events	*
	**	* Returns the base address in \p pbase and size in \p psize of the
	***********************************/	* allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the in
	CUresult CUDAAPI cuEventCreate( CUevent *phEvent, unsigned int Flags );	put
	CUresult CUDAAPI cuEventRecord( CUevent hEvent, CUstream hStream );	* pointer \p dptr. Both parameters \p pbase and \p psize are optional. If
	CUresult CUDAAPI cuEventQuery( CUevent hEvent );	one
	CUresult CUDAAPI cuEventSynchronize( CUevent hEvent );	* of them is NULL, it is ignored.
	CUresult CUDAAPI cuEventDestroy( CUevent hEvent );	*
	CUresult CUDAAPI cuEventElapsedTime( float *pMilliseconds, CUevent hSta	* \param pbase - Returned base address
	rt, CUevent hEnd );	* \param psize - Returned size of device memory allocation
		* \param dptr - Device pointer to query
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr pbase, size_t psize, CU
		deviceptr dptr);


	/************************************	/**
	**	* \brief Allocates page-locked host memory
	** Streams	*
	**	* Allocates \p bytesize bytes of host memory that is page-locked and
	***********************************/	* accessible to the device. The driver tracks the virtual memory ranges
	CUresult CUDAAPI cuStreamCreate( CUstream *phStream, unsigned int Flag	* allocated with this function and automatically accelerates calls to
	s );	* functions such as ::cuMemcpy(). Since the memory can be accessed directl
	CUresult CUDAAPI cuStreamQuery( CUstream hStream );	y by
	CUresult CUDAAPI cuStreamSynchronize( CUstream hStream );	* the device, it can be read or written with much higher bandwidth than
	CUresult CUDAAPI cuStreamDestroy( CUstream hStream );	* pageable memory obtained with functions such as ::malloc(). Allocating
		* excessive amounts of memory with ::cuMemAllocHost() may degrade system
		* performance, since it reduces the amount of memory available to the syst
		em
		* for paging. As a result, this function is best used sparingly to allocat
		e
		* staging areas for data exchange between host and device.
		*
		* \param pp - Returned host pointer to page-locked memory
		* \param bytesize - Requested allocation size in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize);
		#endif /* __CUDA_API_VERSION >= 3020 */


	/************************************	/**
	**	* \brief Frees page-locked host memory
	** Graphics interop	*
	**	* Frees the memory space pointed to by \p p, which must have been returned
	***********************************/	by
	CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resour	* a previous call to ::cuMemAllocHost().
	ce);	*
	CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( CUarray *pArray,	* \param p - Pointer to memory to free
	CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel	*
	);	* \return
	CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( CUdeviceptr *pDevP	* ::CUDA_SUCCESS,
	tr, unsigned int *pSize, CUgraphicsResource resource );	* ::CUDA_ERROR_DEINITIALIZED,
	CUresult CUDAAPI cuGraphicsResourceSetMapFlags( CUgraphicsResource reso	* ::CUDA_ERROR_NOT_INITIALIZED,
	urce, unsigned int flags );	* ::CUDA_ERROR_INVALID_CONTEXT,
	CUresult CUDAAPI cuGraphicsMapResources( unsigned int count, CUgraphics	* ::CUDA_ERROR_INVALID_VALUE
	Resource *resources, CUstream hStream );	* \notefnerr
	CUresult CUDAAPI cuGraphicsUnmapResources( unsigned int count, CUgraphi	*
	csResource *resources, CUstream hStream );	* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemFreeHost(void *p);


	/************************************	/**
	**	* \brief Allocates page-locked host memory
	** Export tables	*
	**	* Allocates \p bytesize bytes of host memory that is page-locked and acces
	***********************************/	sible
	CUresult CUDAAPI cuGetExportTable( const void **ppExportTable, const CU	* to the device. The driver tracks the virtual memory ranges allocated wit
	uuid *pExportTableId );	h
		* this function and automatically accelerates calls to functions such as
		* ::cuMemcpyHtoD(). Since the memory can be accessed directly by the devic
		e,
		* it can be read or written with much higher bandwidth than pageable memor
		y
		* obtained with functions such as ::malloc(). Allocating excessive amounts
		of
		* pinned memory may degrade system performance, since it reduces the amoun
		t
		* of memory available to the system for paging. As a result, this function
		is
		* best used sparingly to allocate staging areas for data exchange between
		* host and device.
		*
		* The \p Flags parameter enables different options to be specified that
		* affect the allocation, as follows.
		*
		* - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be
		* considered as pinned memory by all CUDA contexts, not just the one tha
		t
		* performed the allocation.
		*
		* - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address
		* space. The device pointer to the memory may be obtained by calling
		* ::cuMemHostGetDevicePointer(). This feature is available only on GPUs
		* with compute capability greater than or equal to 1.1.
		*
		* - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combine
		d
		* (WC). WC memory can be transferred across the PCI Express bus more
		* quickly on some system configurations, but cannot be read efficiently
		by
		* most CPUs. WC memory is a good option for buffers that will be written
		by
		* the CPU and read by the GPU via mapped pinned memory or host->device
		* transfers.
		*
		* All of these flags are orthogonal to one another: a developer may alloca
		te
		* memory that is portable, mapped and/or write-combined with no restrictio
		ns.
		*
		* The CUDA context must have been created with the ::CU_CTX_MAP_HOST flag
		in
		* order for the ::CU_MEMHOSTALLOC_MAPPED flag to have any effect.
		*
		* The ::CU_MEMHOSTALLOC_MAPPED flag may be specified on CUDA contexts for
		* devices that do not support mapped pinned memory. The failure is deferre
		d
		* to ::cuMemHostGetDevicePointer() because the memory may be mapped into
		* other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag.
		*
		* The memory allocated by this function must be freed with ::cuMemFreeHost
		().
		*
		* \param pp - Returned host pointer to page-locked memory
		* \param bytesize - Requested allocation size in bytes
		* \param Flags - Flags for allocation request
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Fl
		ags);


	/************************************	#if __CUDA_API_VERSION >= 3020
	**	/**
	** Limits	* \brief Passes back device pointer of mapped pinned memory
	**	*
	***********************************/	* Passes back the device pointer \p pdptr corresponding to the mapped, pin
		ned
		* host buffer \p p allocated by ::cuMemHostAlloc.
		*
		* ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMALLOCHOST_DEVICEM
		AP
		* flag was not specified at the time the memory was allocated, or if the
		* function is called on a GPU that does not support mapped pinned memory.
		*
		* \p Flags provides for future releases. For now, it must be set to 0.
		*
		* \param pdptr - Returned device pointer
		* \param p - Host pointer
		* \param Flags - Options (must be 0)
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr pdptr, void p, uns
		igned int Flags);
		#endif /* __CUDA_API_VERSION >= 3020 */


	CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value);	/**
	CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);	* \brief Passes back flags that were used for a pinned allocation
		*
		* Passes back the flags \p pFlags that were specified when allocating
		* the pinned host buffer \p p allocated by ::cuMemHostAlloc.
		*
		* ::cuMemHostGetFlags() will fail if the pointer does not reside in
		* an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc().
		*
		* \param pFlags - Returned flags word
		* \param p - Host pointer
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuMemAllocHost, ::cuMemHostAlloc
		*/
		CUresult CUDAAPI cuMemHostGetFlags(unsigned int pFlags, void p);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Copies memory from Host to Device
		*
		* Copies from host memory to device memory. \p dstDevice and \p srcHost ar
		e
		* the base addresses of the destination and source, respectively. \p ByteC
		ount
		* specifies the number of bytes to copy. Note that this function is
		* synchronous.
		*
		* \param dstDevice - Destination device pointer
		* \param srcHost - Source host pointer
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, s
		ize_t ByteCount);

		/**
		* \brief Copies memory from Device to Host
		*
		* Copies from device to host memory. \p dstHost and \p srcDevice specify t
		he
		* base pointers of the destination and source, respectively. \p ByteCount
		* specifies the number of bytes to copy. Note that this function is
		* synchronous.
		*
		* \param dstHost - Destination host pointer
		* \param srcDevice - Source device pointer
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t
		ByteCount);

		/**
		* \brief Copies memory from Device to Device
		*
		* Copies from device memory to device memory. \p dstDevice and \p srcDevic
		e
		* are the base pointers of the destination and source, respectively.
		* \p ByteCount specifies the number of bytes to copy. Note that this funct
		ion
		* is asynchronous.
		*
		* \param dstDevice - Destination device pointer
		* \param srcDevice - Source device pointer
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice,
		size_t ByteCount);

		/**
		* \brief Copies memory from Device to Array
		*
		* Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffs
		et
		* specify the CUDA array handle and starting index of the destination data
		.
		* \p srcDevice specifies the base pointer of the source. \p ByteCount
		* specifies the number of bytes to copy.
		*
		* \param dstArray - Destination array
		* \param dstOffset - Offset in bytes of destination array
		* \param srcDevice - Source device pointer
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync
		,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdevicep
		tr srcDevice, size_t ByteCount);

		/**
		* \brief Copies memory from Array to Device
		*
		* Copies from one 1D CUDA array to device memory. \p dstDevice specifies t
		he
		* base pointer of the destination and must be naturally aligned with the C
		UDA
		* array elements. \p srcArray and \p srcOffset specify the CUDA array hand
		le
		* and the offset in bytes into the array where the copy is to begin.
		* \p ByteCount specifies the number of bytes to copy and must be evenly
		* divisible by the array element size.
		*
		* \param dstDevice - Destination device pointer
		* \param srcArray - Source array
		* \param srcOffset - Offset in bytes of source array
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size
		_t srcOffset, size_t ByteCount);

		/**
		* \brief Copies memory from Host to Array
		*
		* Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
		* specify the CUDA array handle and starting offset in bytes of the destin
		ation
		* data. \p pSrc specifies the base address of the source. \p ByteCount sp
		ecifies
		* the number of bytes to copy.
		*
		* \param dstArray - Destination array
		* \param dstOffset - Offset in bytes of destination array
		* \param srcHost - Source host pointer
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const voi
		d *srcHost, size_t ByteCount);

		/**
		* \brief Copies memory from Array to Host
		*
		* Copies from one 1D CUDA array to host memory. \p dstHost specifies the b
		ase
		* pointer of the destination. \p srcArray and \p srcOffset specify the CUD
		A
		* array handle and starting offset in bytes of the source data.
		* \p ByteCount specifies the number of bytes to copy.
		*
		* \param dstHost - Destination device pointer
		* \param srcArray - Source array
		* \param srcOffset - Offset in bytes of source array
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync
		,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOf
		fset, size_t ByteCount);

		/**
		* \brief Copies memory from Array to Array
		*
		* Copies from one 1D CUDA array to another. \p dstArray and \p srcArray
		* specify the handles of the destination and source CUDA arrays for the co
		py,
		* respectively. \p dstOffset and \p srcOffset specify the destination and
		* source offsets in bytes into the CUDA arrays. \p ByteCount is the number
		of
		* bytes to be copied. The size of the elements in the CUDA arrays need not
		be
		* the same format, but the elements must be the same size; and count must
		be
		* evenly divisible by that size.
		*
		* \param dstArray - Destination array
		* \param dstOffset - Offset in bytes of destination array
		* \param srcArray - Source array
		* \param srcOffset - Offset in bytes of source array
		* \param ByteCount - Size of memory copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray s
		rcArray, size_t srcOffset, size_t ByteCount);

		/**
		* \brief Copies memory for 2D arrays
		*
		* Perform a 2D memory copy according to the parameters specified in \p pCo
		py.
		* The ::CUDA_MEMCPY2D structure is defined as:
		*
		* \code
		typedef struct CUDA_MEMCPY2D_st {
		unsigned int srcXInBytes, srcY;
		CUmemorytype srcMemoryType;
		const void *srcHost;
		CUdeviceptr srcDevice;
		CUarray srcArray;
		unsigned int srcPitch;

		unsigned int dstXInBytes, dstY;
		CUmemorytype dstMemoryType;
		void *dstHost;
		CUdeviceptr dstDevice;
		CUarray dstArray;
		unsigned int dstPitch;

		unsigned int WidthInBytes;
		unsigned int Height;
		} CUDA_MEMCPY2D;
		* \endcode
		* where:
		* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
		* source and destination, respectively; ::CUmemorytype_enum is defined a
		s:
		*
		* \code
		typedef enum CUmemorytype_enum {
		CU_MEMORYTYPE_HOST = 0x01,
		CU_MEMORYTYPE_DEVICE = 0x02,
		CU_MEMORYTYPE_ARRAY = 0x03
		} CUmemorytype;
		* \endcode
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
		* specify the (host) base address of the source data and the bytes per row
		to
		* apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
		* specify the (device) base address of the source data and the bytes per r
		ow
		* to apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
		* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
		* ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
		* specify the (host) base address of the destination data and the bytes pe
		r
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
		* specify the (device) base address of the destination data and the bytes
		per
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
		* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
		e
		* ignored.
		*
		* - ::srcXInBytes and ::srcY specify the base address of the source data f
		or
		* the copy.
		*
		* \par
		* For host pointers, the starting address is
		* \code
		void* Start = (void)((char)srcHost+srcY*srcPitch + srcXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::dstXInBytes and ::dstY specify the base address of the destination d
		ata
		* for the copy.
		*
		* \par
		* For host pointers, the base address is
		* \code
		void* dstStart = (void)((char)dstHost+dstY*dstPitch + dstXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
		* the 2D copy being performed.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		*
		* \par
		* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
		* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
		ck
		* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
		ies
		* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
		* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
		* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
		* significantly slower in the cases where ::cuMemcpy2D() would have return
		ed
		* an error code.
		*
		* \param pCopy - Parameters for the memory copy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy);

		/**
		* \brief Copies memory for 2D arrays
		*
		* Perform a 2D memory copy according to the parameters specified in \p pCo
		py.
		* The ::CUDA_MEMCPY2D structure is defined as:
		*
		* \code
		typedef struct CUDA_MEMCPY2D_st {
		unsigned int srcXInBytes, srcY;
		CUmemorytype srcMemoryType;
		const void *srcHost;
		CUdeviceptr srcDevice;
		CUarray srcArray;
		unsigned int srcPitch;
		unsigned int dstXInBytes, dstY;
		CUmemorytype dstMemoryType;
		void *dstHost;
		CUdeviceptr dstDevice;
		CUarray dstArray;
		unsigned int dstPitch;
		unsigned int WidthInBytes;
		unsigned int Height;
		} CUDA_MEMCPY2D;
		* \endcode
		* where:
		* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
		* source and destination, respectively; ::CUmemorytype_enum is defined a
		s:
		*
		* \code
		typedef enum CUmemorytype_enum {
		CU_MEMORYTYPE_HOST = 0x01,
		CU_MEMORYTYPE_DEVICE = 0x02,
		CU_MEMORYTYPE_ARRAY = 0x03
		} CUmemorytype;
		* \endcode
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
		* specify the (host) base address of the source data and the bytes per row
		to
		* apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
		* specify the (device) base address of the source data and the bytes per r
		ow
		* to apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
		* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
		* ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
		* specify the (host) base address of the destination data and the bytes pe
		r
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
		* specify the (device) base address of the destination data and the bytes
		per
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
		* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
		e
		* ignored.
		*
		* - ::srcXInBytes and ::srcY specify the base address of the source data f
		or
		* the copy.
		*
		* \par
		* For host pointers, the starting address is
		* \code
		void* Start = (void)((char)srcHost+srcY*srcPitch + srcXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::dstXInBytes and ::dstY specify the base address of the destination d
		ata
		* for the copy.
		*
		* \par
		* For host pointers, the base address is
		* \code
		void* dstStart = (void)((char)dstHost+dstY*dstPitch + dstXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
		* the 2D copy being performed.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		*
		* \par
		* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
		* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
		ck
		* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
		ies
		* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
		* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
		* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
		* significantly slower in the cases where ::cuMemcpy2D() would have return
		ed
		* an error code.
		*
		* \param pCopy - Parameters for the memory copy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);

		/**
		* \brief Copies memory for 3D arrays
		*
		* Perform a 3D memory copy according to the parameters specified in
		* \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
		*
		* \code
		typedef struct CUDA_MEMCPY3D_st {

		unsigned int srcXInBytes, srcY, srcZ;
		unsigned int srcLOD;
		CUmemorytype srcMemoryType;
		const void *srcHost;
		CUdeviceptr srcDevice;
		CUarray srcArray;
		unsigned int srcPitch; // ignored when src is array
		unsigned int srcHeight; // ignored when src is array; may b
		e 0 if Depth==1

		unsigned int dstXInBytes, dstY, dstZ;
		unsigned int dstLOD;
		CUmemorytype dstMemoryType;
		void *dstHost;
		CUdeviceptr dstDevice;
		CUarray dstArray;
		unsigned int dstPitch; // ignored when dst is array
		unsigned int dstHeight; // ignored when dst is array; may b
		e 0 if Depth==1

		unsigned int WidthInBytes;
		unsigned int Height;
		unsigned int Depth;
		} CUDA_MEMCPY3D;
		* \endcode
		* where:
		* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
		* source and destination, respectively; ::CUmemorytype_enum is defined a
		s:
		*
		* \code
		typedef enum CUmemorytype_enum {
		CU_MEMORYTYPE_HOST = 0x01,
		CU_MEMORYTYPE_DEVICE = 0x02,
		CU_MEMORYTYPE_ARRAY = 0x03
		} CUmemorytype;
		* \endcode
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
		* ::srcHeight specify the (host) base address of the source data, the byte
		s
		* per row, and the height of each 2D slice of the 3D array. ::srcArray is
		* ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an
		d
		* ::srcHeight specify the (device) base address of the source data, the by
		tes
		* per row, and the height of each 2D slice of the 3D array. ::srcArray is
		* ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
		* handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
		* ::srcHeight are ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
		* specify the (host) base address of the destination data, the bytes per r
		ow,
		* and the height of each 2D slice of the 3D array. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
		* specify the (device) base address of the destination data, the bytes per
		* row, and the height of each 2D slice of the 3D array. ::dstArray is igno
		red.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
		* handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
		* ::dstHeight are ignored.
		*
		* - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc
		e
		* data for the copy.
		*
		* \par
		* For host pointers, the starting address is
		* \code
		void* Start = (void)((char)srcHost+(srcZsrcHeight+srcY)srcPitch + src
		XInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr Start = srcDevice+(srcZsrcHeight+srcY)srcPitch+srcXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
		* element size.
		*
		* - dstXInBytes, ::dstY and ::dstZ specify the base address of the
		* destination data for the copy.
		*
		* \par
		* For host pointers, the base address is
		* \code
		void* dstStart = (void)((char)dstHost+(dstZdstHeight+dstY)dstPitch +
		dstXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr dstStart = dstDevice+(dstZdstHeight+dstY)dstPitch+dstXInByt
		es;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei
		ght
		* and depth of the 3D copy being performed.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		* - If specified, ::srcHeight must be greater than or equal to ::Height +
		* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
		dstY.
		*
		* \par
		* ::cuMemcpy3D() returns an error if any pitch is greater than the maximum
		* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
		*
		* The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must
		be
		* set to 0.
		*
		* \param pCopy - Parameters for the memory copy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy);

		/**
		* \brief Copies memory from Host to Device
		*
		* Copies from host memory to device memory. \p dstDevice and \p srcHost ar
		e
		* the base addresses of the destination and source, respectively. \p ByteC
		ount
		* specifies the number of bytes to copy.
		*
		* ::cuMemcpyHtoDAsync() is asynchronous and can optionally be associated t
		o a
		* stream by passing a non-zero \p hStream argument. It only works on
		* page-locked memory and returns an error if a pointer to pageable memory
		is
		* passed as input.
		*
		* \param dstDevice - Destination device pointer
		* \param srcHost - Source host pointer
		* \param ByteCount - Size of memory copy in bytes
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo
		st, size_t ByteCount, CUstream hStream);

		/**
		* \brief Copies memory from Device to Host
		*
		* Copies from device to host memory. \p dstHost and \p srcDevice specify t
		he
		* base pointers of the destination and source, respectively. \p ByteCount
		* specifies the number of bytes to copy.
		*
		* ::cuMemcpyDtoHAsync() is asynchronous and can optionally be associated t
		o a
		* stream by passing a non-zero \p hStream argument. It only works on
		* page-locked memory and returns an error if a pointer to pageable memory
		is
		* passed as input.
		*
		* \param dstHost - Destination host pointer
		* \param srcDevice - Source device pointer
		* \param ByteCount - Size of memory copy in bytes
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, si
		ze_t ByteCount, CUstream hStream);

		/**
		* \brief Copies memory from Device to Device
		*
		* Copies from device memory to device memory. \p dstDevice and \p srcDevic
		e
		* are the base pointers of the destination and source, respectively.
		* \p ByteCount specifies the number of bytes to copy. Note that this funct
		ion
		* is asynchronous and can optionally be associated to a stream by passing
		a
		* non-zero \p hStream argument
		*
		* \param dstDevice - Destination device pointer
		* \param srcDevice - Source device pointer
		* \param ByteCount - Size of memory copy in bytes
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe
		vice, size_t ByteCount, CUstream hStream);

		/**
		* \brief Copies memory from Host to Array
		*
		* Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset
		* specify the CUDA array handle and starting offset in bytes of the
		* destination data. \p srcHost specifies the base address of the source.
		* \p ByteCount specifies the number of bytes to copy.
		*
		* ::cuMemcpyHtoAAsync() is asynchronous and can optionally be associated t
		o a
		* stream by passing a non-zero \p hStream argument. It only works on
		* page-locked memory and returns an error if a pointer to pageable memory
		is
		* passed as input.
		*
		* \param dstArray - Destination array
		* \param dstOffset - Offset in bytes of destination array
		* \param srcHost - Source host pointer
		* \param ByteCount - Size of memory copy in bytes
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, cons
		t void *srcHost, size_t ByteCount, CUstream hStream);

		/**
		* \brief Copies memory from Array to Host
		*
		* Copies from one 1D CUDA array to host memory. \p dstHost specifies the b
		ase
		* pointer of the destination. \p srcArray and \p srcOffset specify the CUD
		A
		* array handle and starting offset in bytes of the source data.
		* \p ByteCount specifies the number of bytes to copy.
		*
		* ::cuMemcpyAtoHAsync() is asynchronous and can optionally be associated t
		o a
		* stream by passing a non-zero \p stream argument. It only works on
		* page-locked host memory and returns an error if a pointer to pageable
		* memory is passed as input.
		*
		* \param dstHost - Destination pointer
		* \param srcArray - Source array
		* \param srcOffset - Offset in bytes of source array
		* \param ByteCount - Size of memory copy in bytes
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t
		srcOffset, size_t ByteCount, CUstream hStream);

		/**
		* \brief Copies memory for 2D arrays
		*
		* Perform a 2D memory copy according to the parameters specified in \p pCo
		py.
		* The ::CUDA_MEMCPY2D structure is defined as:
		*
		* \code
		typedef struct CUDA_MEMCPY2D_st {
		unsigned int srcXInBytes, srcY;
		CUmemorytype srcMemoryType;
		const void *srcHost;
		CUdeviceptr srcDevice;
		CUarray srcArray;
		unsigned int srcPitch;
		unsigned int dstXInBytes, dstY;
		CUmemorytype dstMemoryType;
		void *dstHost;
		CUdeviceptr dstDevice;
		CUarray dstArray;
		unsigned int dstPitch;
		unsigned int WidthInBytes;
		unsigned int Height;
		} CUDA_MEMCPY2D;
		* \endcode
		* where:
		* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
		* source and destination, respectively; ::CUmemorytype_enum is defined a
		s:
		*
		* \code
		typedef enum CUmemorytype_enum {
		CU_MEMORYTYPE_HOST = 0x01,
		CU_MEMORYTYPE_DEVICE = 0x02,
		CU_MEMORYTYPE_ARRAY = 0x03
		} CUmemorytype;
		* \endcode
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch
		* specify the (host) base address of the source data and the bytes per row
		to
		* apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch
		* specify the (device) base address of the source data and the bytes per r
		ow
		* to apply. ::srcArray is ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
		* handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are
		* ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
		* specify the (host) base address of the destination data and the bytes pe
		r
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
		* specify the (device) base address of the destination data and the bytes
		per
		* row to apply. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
		* handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch ar
		e
		* ignored.
		*
		* - ::srcXInBytes and ::srcY specify the base address of the source data f
		or
		* the copy.
		*
		* \par
		* For host pointers, the starting address is
		* \code
		void* Start = (void)((char)srcHost+srcY*srcPitch + srcXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::dstXInBytes and ::dstY specify the base address of the destination d
		ata
		* for the copy.
		*
		* \par
		* For host pointers, the base address is
		* \code
		void* dstStart = (void)((char)dstHost+dstY*dstPitch + dstXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::WidthInBytes and ::Height specify the width (in bytes) and height of
		* the 2D copy being performed.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		* - If specified, ::srcHeight must be greater than or equal to ::Height +
		* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
		dstY.
		*
		* \par
		* ::cuMemcpy2D() returns an error if any pitch is greater than the maximum
		* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes ba
		ck
		* pitches that always work with ::cuMemcpy2D(). On intra-device memory cop
		ies
		* (device ? device, CUDA array ? device, CUDA array ? CUDA array),
		* ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch().
		* ::cuMemcpy2DUnaligned() does not have this restriction, but may run
		* significantly slower in the cases where ::cuMemcpy2D() would have return
		ed
		* an error code.
		*
		* ::cuMemcpy2DAsync() is asynchronous and can optionally be associated to
		a
		* stream by passing a non-zero \p hStream argument. It only works on
		* page-locked host memory and returns an error if a pointer to pageable
		* memory is passed as input.
		*
		* \param pCopy - Parameters for the memory copy
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre
		am);

		/**
		* \brief Copies memory for 3D arrays
		*
		* Perform a 3D memory copy according to the parameters specified in
		* \p pCopy. The ::CUDA_MEMCPY3D structure is defined as:
		*
		* \code
		typedef struct CUDA_MEMCPY3D_st {

		unsigned int srcXInBytes, srcY, srcZ;
		unsigned int srcLOD;
		CUmemorytype srcMemoryType;
		const void *srcHost;
		CUdeviceptr srcDevice;
		CUarray srcArray;
		unsigned int srcPitch; // ignored when src is array
		unsigned int srcHeight; // ignored when src is array; may b
		e 0 if Depth==1

		unsigned int dstXInBytes, dstY, dstZ;
		unsigned int dstLOD;
		CUmemorytype dstMemoryType;
		void *dstHost;
		CUdeviceptr dstDevice;
		CUarray dstArray;
		unsigned int dstPitch; // ignored when dst is array
		unsigned int dstHeight; // ignored when dst is array; may b
		e 0 if Depth==1

		unsigned int WidthInBytes;
		unsigned int Height;
		unsigned int Depth;
		} CUDA_MEMCPY3D;
		* \endcode
		* where:
		* - ::srcMemoryType and ::dstMemoryType specify the type of memory of the
		* source and destination, respectively; ::CUmemorytype_enum is defined a
		s:
		*
		* \code
		typedef enum CUmemorytype_enum {
		CU_MEMORYTYPE_HOST = 0x01,
		CU_MEMORYTYPE_DEVICE = 0x02,
		CU_MEMORYTYPE_ARRAY = 0x03
		} CUmemorytype;
		* \endcode
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and
		* ::srcHeight specify the (host) base address of the source data, the byte
		s
		* per row, and the height of each 2D slice of the 3D array. ::srcArray is
		* ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch an
		d
		* ::srcHeight specify the (device) base address of the source data, the by
		tes
		* per row, and the height of each 2D slice of the 3D array. ::srcArray is
		* ignored.
		*
		* \par
		* If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the
		* handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and
		* ::srcHeight are ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch
		* specify the (host) base address of the destination data, the bytes per r
		ow,
		* and the height of each 2D slice of the 3D array. ::dstArray is ignored.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch
		* specify the (device) base address of the destination data, the bytes per
		* row, and the height of each 2D slice of the 3D array. ::dstArray is igno
		red.
		*
		* \par
		* If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the
		* handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and
		* ::dstHeight are ignored.
		*
		* - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the sourc
		e
		* data for the copy.
		*
		* \par
		* For host pointers, the starting address is
		* \code
		void* Start = (void)((char)srcHost+(srcZsrcHeight+srcY)srcPitch + src
		XInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr Start = srcDevice+(srcZsrcHeight+srcY)srcPitch+srcXInBytes;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::srcXInBytes must be evenly divisible by the array
		* element size.
		*
		* - dstXInBytes, ::dstY and ::dstZ specify the base address of the
		* destination data for the copy.
		*
		* \par
		* For host pointers, the base address is
		* \code
		void* dstStart = (void)((char)dstHost+(dstZdstHeight+dstY)dstPitch +
		dstXInBytes);
		* \endcode
		*
		* \par
		* For device pointers, the starting address is
		* \code
		CUdeviceptr dstStart = dstDevice+(dstZdstHeight+dstY)dstPitch+dstXInByt
		es;
		* \endcode
		*
		* \par
		* For CUDA arrays, ::dstXInBytes must be evenly divisible by the array
		* element size.
		*
		* - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), hei
		ght
		* and depth of the 3D copy being performed.
		* - If specified, ::srcPitch must be greater than or equal to ::WidthInByt
		es +
		* ::srcXInBytes, and ::dstPitch must be greater than or equal to
		* ::WidthInBytes + dstXInBytes.
		* - If specified, ::srcHeight must be greater than or equal to ::Height +
		* ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::
		dstY.
		*
		* \par
		* ::cuMemcpy3D() returns an error if any pitch is greater than the maximum
		* allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH).
		*
		* ::cuMemcpy3DAsync() is asynchronous and can optionally be associated to
		a
		* stream by passing a non-zero \p hStream argument. It only works on
		* page-locked host memory and returns an error if a pointer to pageable
		* memory is passed as input.
		*
		* The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must
		be
		* set to 0.
		*
		* \param pCopy - Parameters for the memory copy
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre
		am);

		/**
		* \brief Initializes device memory
		*
		* Sets the memory range of \p N 8-bit values to the specified value
		* \p uc.
		*
		* \param dstDevice - Destination device pointer
		* \param uc - Value to set
		* \param N - Number of elements
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t
		N);

		/**
		* \brief Initializes device memory
		*
		* Sets the memory range of \p N 16-bit values to the specified value
		* \p us.
		*
		* \param dstDevice - Destination device pointer
		* \param us - Value to set
		* \param N - Number of elements
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size
		_t N);

		/**
		* \brief Initializes device memory
		*
		* Sets the memory range of \p N 32-bit values to the specified value
		* \p ui.
		*
		* \param dstDevice - Destination device pointer
		* \param ui - Value to set
		* \param N - Number of elements
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t
		N);

		/**
		* \brief Initializes device memory
		*
		* Sets the 2D memory range of \p Width 8-bit values to the specified value
		* \p uc. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param uc - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsig
		ned char uc, size_t Width, size_t Height);

		/**
		* \brief Initializes device memory
		*
		* Sets the 2D memory range of \p Width 16-bit values to the specified valu
		e
		* \p us. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param us - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsi
		gned short us, size_t Width, size_t Height);

		/**
		* \brief Initializes device memory
		*
		* Sets the 2D memory range of \p Width 32-bit values to the specified valu
		e
		* \p ui. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param ui - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsi
		gned int ui, size_t Width, size_t Height);

		/**
		* \brief Sets device memory
		*
		* Sets the memory range of \p N 8-bit values to the specified value
		* \p uc.
		*
		* ::cuMemsetD8Async() is asynchronous and can optionally be associated to
		a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param uc - Value to set
		* \param N - Number of elements
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, s
		ize_t N, CUstream hStream);

		/**
		* \brief Sets device memory
		*
		* Sets the memory range of \p N 16-bit values to the specified value
		* \p us.
		*
		* ::cuMemsetD16Async() is asynchronous and can optionally be associated to
		a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param us - Value to set
		* \param N - Number of elements
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us,
		size_t N, CUstream hStream);

		/**
		* \brief Sets device memory
		*
		* Sets the memory range of \p N 32-bit values to the specified value
		* \p ui.
		*
		* ::cuMemsetD32Async() is asynchronous and can optionally be associated to
		a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param ui - Value to set
		* \param N - Number of elements
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cu
		MemsetD32
		*/
		CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, s
		ize_t N, CUstream hStream);

		/**
		* \brief Sets device memory
		*
		* Sets the 2D memory range of \p Width 8-bit values to the specified value
		* \p uc. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* ::cuMemsetD2D8Async() is asynchronous and can optionally be associated t
		o a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param uc - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32A
		sync,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch,
		unsigned char uc, size_t Width, size_t Height, CUstream hStream);

		/**
		* \brief Sets device memory
		*
		* Sets the 2D memory range of \p Width 16-bit values to the specified valu
		e
		* \p us. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* ::cuMemsetD2D16Async() is asynchronous and can optionally be associated
		to a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param us - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch,
		unsigned short us, size_t Width, size_t Height, CUstream hStream);

		/**
		* \brief Sets device memory
		*
		* Sets the 2D memory range of \p Width 32-bit values to the specified valu
		e
		* \p ui. \p Height specifies the number of rows to set, and \p dstPitch
		* specifies the number of bytes between each row. This function performs
		* fastest when the pitch is one that has been passed back by
		* ::cuMemAllocPitch().
		*
		* ::cuMemsetD2D32Async() is asynchronous and can optionally be associated
		to a
		* stream by passing a non-zero \p stream argument.
		*
		* \param dstDevice - Destination device pointer
		* \param dstPitch - Pitch of destination device pointer
		* \param ui - Value to set
		* \param Width - Width of row
		* \param Height - Number of rows
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async,
		* ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32,
		* ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async,
		* ::cuMemsetD32, ::cuMemsetD32Async
		*/
		CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch,
		unsigned int ui, size_t Width, size_t Height, CUstream hStream);

		/**
		* \brief Creates a 1D or 2D CUDA array
		*
		* Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure
		* \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan
		dle.
		* The ::CUDA_ARRAY_DESCRIPTOR is defined as:
		*
		* \code
		typedef struct {
		unsigned int Width;
		unsigned int Height;
		CUarray_format Format;
		unsigned int NumChannels;
		} CUDA_ARRAY_DESCRIPTOR;
		* \endcode
		* where:
		*
		* - \p Width, and \p Height are the width, and height of the CUDA array (i
		n
		* elements); the CUDA array is one-dimensional if height is 0, two-dimensi
		onal
		* otherwise;
		* - ::Format specifies the format of the elements; ::CUarray_format is
		* defined as:
		* \code
		typedef enum CUarray_format_enum {
		CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
		CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
		CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
		CU_AD_FORMAT_SIGNED_INT8 = 0x08,
		CU_AD_FORMAT_SIGNED_INT16 = 0x09,
		CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
		CU_AD_FORMAT_HALF = 0x10,
		CU_AD_FORMAT_FLOAT = 0x20
		} CUarray_format;
		* \endcode
		* - \p NumChannels specifies the number of packed components per CUDA arra
		y
		* element; it may be 1, 2, or 4;
		*
		* Here are examples of CUDA array descriptions:
		*
		* Description for a CUDA array of 2048 floats:
		* \code
		CUDA_ARRAY_DESCRIPTOR desc;
		desc.Format = CU_AD_FORMAT_FLOAT;
		desc.NumChannels = 1;
		desc.Width = 2048;
		desc.Height = 1;
		* \endcode
		*
		* Description for a 64 x 64 CUDA array of floats:
		* \code
		CUDA_ARRAY_DESCRIPTOR desc;
		desc.Format = CU_AD_FORMAT_FLOAT;
		desc.NumChannels = 1;
		desc.Width = 64;
		desc.Height = 64;
		* \endcode
		*
		* Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit
		* float16's:
		* \code
		CUDA_ARRAY_DESCRIPTOR desc;
		desc.FormatFlags = CU_AD_FORMAT_HALF;
		desc.NumChannels = 4;
		desc.Width = width;
		desc.Height = height;
		* \endcode
		*
		* Description for a \p width x \p height CUDA array of 16-bit elements, ea
		ch
		* of which is two 8-bit unsigned chars:
		* \code
		CUDA_ARRAY_DESCRIPTOR arrayDesc;
		desc.FormatFlags = CU_AD_FORMAT_UNSIGNED_INT8;
		desc.NumChannels = 2;
		desc.Width = width;
		desc.Height = height;
		* \endcode
		*
		* \param pHandle - Returned array
		* \param pAllocateArray - Array descriptor
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO
		R *pAllocateArray);

		/**
		* \brief Get a 1D or 2D CUDA array descriptor
		*
		* Returns in \p *pArrayDescriptor a descriptor containing information on t
		he
		* format and dimensions of the CUDA array \p hArray. It is useful for
		* subroutines that have been passed a CUDA array, but need to know the CUD
		A
		* array parameters for validation or other purposes.
		*
		* \param pArrayDescriptor - Returned array descriptor
		* \param hArray - Array to get descriptor of
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_HANDLE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript
		or, CUarray hArray);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Destroys a CUDA array
		*
		* Destroys the CUDA array \p hArray.
		*
		* \param hArray - Array to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ARRAY_IS_MAPPED
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuArrayDestroy(CUarray hArray);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Creates a 3D CUDA array
		*
		* Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structur
		e
		* \p pAllocateArray and returns a handle to the new CUDA array in \p *pHan
		dle.
		* The ::CUDA_ARRAY3D_DESCRIPTOR is defined as:
		*
		* \code
		typedef struct {
		unsigned int Width;
		unsigned int Height;
		unsigned int Depth;
		CUarray_format Format;
		unsigned int NumChannels;
		unsigned int Flags;
		} CUDA_ARRAY3D_DESCRIPTOR;
		* \endcode
		* where:
		*
		* - \p Width, \p Height, and \p Depth are the width, height, and depth of
		the
		* CUDA array (in elements); the CUDA array is one-dimensional if height an
		d
		* depth are 0, two-dimensional if depth is 0, and three-dimensional otherw
		ise;
		* - ::Format specifies the format of the elements; ::CUarray_format is
		* defined as:
		* \code
		typedef enum CUarray_format_enum {
		CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
		CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
		CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
		CU_AD_FORMAT_SIGNED_INT8 = 0x08,
		CU_AD_FORMAT_SIGNED_INT16 = 0x09,
		CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
		CU_AD_FORMAT_HALF = 0x10,
		CU_AD_FORMAT_FLOAT = 0x20
		} CUarray_format;
		* \endcode
		* - \p NumChannels specifies the number of packed components per CUDA arra
		y
		* element; it may be 1, 2, or 4;
		* - ::Flags may be set to ::CUDA_ARRAY3D_SURFACE_LDST to enable surface re
		ferences
		* to be bound to the CUDA array. If this flag is not set, ::cuSurfRefSetA
		rray
		* will fail when attempting to bind the CUDA array to a surface reference.
		*
		* Here are examples of CUDA array descriptions:
		*
		* Description for a CUDA array of 2048 floats:
		* \code
		CUDA_ARRAY3D_DESCRIPTOR desc;
		desc.Format = CU_AD_FORMAT_FLOAT;
		desc.NumChannels = 1;
		desc.Width = 2048;
		desc.Height = 0;
		desc.Depth = 0;
		* \endcode
		*
		* Description for a 64 x 64 CUDA array of floats:
		* \code
		CUDA_ARRAY3D_DESCRIPTOR desc;
		desc.Format = CU_AD_FORMAT_FLOAT;
		desc.NumChannels = 1;
		desc.Width = 64;
		desc.Height = 64;
		desc.Depth = 0;
		* \endcode
		*
		* Description for a \p width x \p height x \p depth CUDA array of 64-bit,
		* 4x16-bit float16's:
		* \code
		CUDA_ARRAY3D_DESCRIPTOR desc;
		desc.FormatFlags = CU_AD_FORMAT_HALF;
		desc.NumChannels = 4;
		desc.Width = width;
		desc.Height = height;
		desc.Depth = depth;
		* \endcode
		*
		* \param pHandle - Returned array
		* \param pAllocateArray - 3D array descriptor
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa ::cuArray3DGetDescriptor, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR
		IPTOR *pAllocateArray);

		/**
		* \brief Get a 3D CUDA array descriptor
		*
		* Returns in \p *pArrayDescriptor a descriptor containing information on t
		he
		* format and dimensions of the CUDA array \p hArray. It is useful for
		* subroutines that have been passed a CUDA array, but need to know the CUD
		A
		* array parameters for validation or other purposes.
		*
		* This function may be called on 1D and 2D arrays, in which case the \p He
		ight
		* and/or \p Depth members of the descriptor struct will be set to 0.
		*
		* \param pArrayDescriptor - Returned 3D array descriptor
		* \param hArray - 3D array to get descriptor of
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_HANDLE
		* \notefnerr
		*
		* \sa ::cuArray3DCreate, ::cuArrayCreate,
		* ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost
		,
		* ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligne
		d,
		* ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD,
		* ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::c
		uMemcpyDtoDAsync,
		* ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync
		,
		* ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost,
		* ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc,
		* ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16,
		* ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32
		*/
		CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc
		riptor, CUarray hArray);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/** @} / / END CUDA_MEM */

		/**
		* \defgroup CUDA_STREAM Stream Management
		*
		* This section describes the stream management functions of the low-level
		CUDA
		* driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Create a stream
		*
		* Creates a stream and returns a handle in \p phStream. \p Flags is requir
		ed
		* to be 0.
		*
		* \param phStream - Returned newly created stream
		* \param Flags - Parameters for stream creation (must be 0)
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuStreamDestroy,
		* ::cuStreamWaitEvent,
		* ::cuStreamQuery,
		* ::cuStreamSynchronize
		*/
		CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags);

		/**
		* \brief Make a compute stream wait on an event
		*
		* Makes all future work submitted to \p hStream wait until \p hEvent
		* reports completion before beginning execution. This synchronization
		* will be performed efficiently on the device.
		*
		* The stream \p hStream will wait only for the completion of the most rece
		nt
		* host call to ::cuEventRecord() on \p hEvent. Once this call has returne
		d,
		* any functions (including ::cuEventRecord() and ::cuEventDestroy()) may b
		e
		* called on \p hEvent again, and the subsequent calls will not have any
		* effect on \p hStream.
		*
		* If \p hStream is 0 (the NULL stream) any future work submitted in any st
		ream
		* will wait for \p hEvent to complete before beginning execution. This
		* effectively creates a barrier for all future work submitted to the conte
		xt.
		*
		* If ::cuEventRecord() has not been called on \p hEvent, this call acts as
		if
		* the record has already completed, and so is a functional no-op.
		*
		* \param hStream - Stream to wait
		* \param hEvent - Event to wait on (may not be NULL)
		* \param Flags - Parameters for the operation (must be 0)
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* \notefnerr
		*
		* \sa ::cuStreamCreate,
		* ::cuEventRecord,
		* ::cuStreamQuery,
		* ::cuStreamSynchronize,
		* ::cuStreamDestroy
		*/
		CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsign
		ed int Flags);

		/**
		* \brief Determine status of a compute stream
		*
		* Returns ::CUDA_SUCCESS if all operations in the stream specified by
		* \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not.
		*
		* \param hStream - Stream to query status of
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_NOT_READY
		* \notefnerr
		*
		* \sa ::cuStreamCreate,
		* ::cuStreamWaitEvent,
		* ::cuStreamDestroy,
		* ::cuStreamSynchronize
		*/
		CUresult CUDAAPI cuStreamQuery(CUstream hStream);

		/**
		* \brief Wait until a stream's tasks are completed
		*
		* Waits until the device has completed all operations in the stream specif
		ied
		* by \p hStream. If the context was created with the ::CU_CTX_BLOCKING_SYN
		C
		* flag, the CPU thread will block until the stream is finished with all of
		* its tasks.
		*
		* \param hStream - Stream to wait for
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE
		* \notefnerr
		*
		* \sa ::cuStreamCreate,
		* ::cuStreamDestroy,
		* ::cuStreamWaitEvent,
		* ::cuStreamQuery
		*/
		CUresult CUDAAPI cuStreamSynchronize(CUstream hStream);

		/**
		* \brief Destroys a stream
		*
		* Destroys the stream specified by \p hStream.
		*
		* \param hStream - Stream to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuStreamCreate,
		* ::cuStreamWaitEvent,
		* ::cuStreamQuery,
		* ::cuStreamSynchronize
		*/
		CUresult CUDAAPI cuStreamDestroy(CUstream hStream);

		/** @} / / END CUDA_STREAM */

		/**
		* \defgroup CUDA_EVENT Event Management
		*
		* This section describes the event management functions of the low-level C
		UDA
		* driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Creates an event
		*
		* Creates an event *phEvent with the flags specified via \p Flags. Valid f
		lags
		* include:
		* - ::CU_EVENT_DEFAULT: Default event creation flag.
		* - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use
		blocking
		* synchronization. A CPU thread that uses ::cuEventSynchronize() to wai
		t on
		* an event created with this flag will block until the event has actuall
		y
		* been recorded.
		* - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not n
		eed
		* to record timing data. Events created with this flag specified and
		* the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best
		* performance when used with ::cuStreamWaitEvent() and ::cuEventQuery().
		*
		* \param phEvent - Returns newly created event
		* \param Flags - Event creation flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa
		* ::cuEventRecord,
		* ::cuEventQuery,
		* ::cuEventSynchronize,
		* ::cuEventDestroy,
		* ::cuEventElapsedTime
		*/
		CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags);

		/**
		* \brief Records an event
		*
		* Records an event. If \p hStream is non-zero, the event is recorded after
		all
		* preceding operations in \p hStream have been completed; otherwise, it is
		* recorded after all preceding operations in the CUDA context have been
		* completed. Since operation is asynchronous, ::cuEventQuery and/or
		* ::cuEventSynchronize() must be used to determine when the event has actu
		ally
		* been recorded.
		*
		* If ::cuEventRecord() has previously been called on \p hEvent, then this
		* call will overwrite any existing state in \p hEvent. Any subsequent cal
		ls
		* which examine the status of \p hEvent will only examine the completion o
		f
		* this most recent call to ::cuEventRecord().
		*
		* \param hEvent - Event to record
		* \param hStream - Stream to record event for
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuEventCreate,
		* ::cuEventQuery,
		* ::cuEventSynchronize,
		* ::cuStreamWaitEvent,
		* ::cuEventDestroy,
		* ::cuEventElapsedTime
		*/
		CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream);

		/**
		* \brief Queries an event's status
		*
		* Query the status of all device work preceding the most recent
		* call to ::cuEventRecord() (in the appropriate compute streams,
		* as specified by the arguments to ::cuEventRecord()).
		*
		* If this work has successfully been completed by the device, or if
		* ::cuEventRecord() has not been called on \p hEvent, then ::CUDA_SUCCESS
		is
		* returned. If this work has not yet been completed by the device then
		* ::CUDA_ERROR_NOT_READY is returned.
		*
		* \param hEvent - Event to query
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_NOT_READY
		* \notefnerr
		*
		* \sa ::cuEventCreate,
		* ::cuEventRecord,
		* ::cuEventSynchronize,
		* ::cuEventDestroy,
		* ::cuEventElapsedTime
		*/
		CUresult CUDAAPI cuEventQuery(CUevent hEvent);

		/**
		* \brief Waits for an event to complete
		*
		* Wait until the completion of all device work preceding the most recent
		* call to ::cuEventRecord() (in the appropriate compute streams, as specif
		ied
		* by the arguments to ::cuEventRecord()).
		*
		* If ::cuEventRecord() has not been called on \p hEvent, ::CUDA_SUCCESS is
		* returned immediately.
		*
		* Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC
		* flag will cause the calling CPU thread to block until the event has
		* been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has
		* not been set, then the CPU thread will busy-wait until the event has
		* been completed by the device.
		*
		* \param hEvent - Event to wait for
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE
		* \notefnerr
		*
		* \sa ::cuEventCreate,
		* ::cuEventRecord,
		* ::cuEventQuery,
		* ::cuEventDestroy,
		* ::cuEventElapsedTime
		*/
		CUresult CUDAAPI cuEventSynchronize(CUevent hEvent);

		/**
		* \brief Destroys an event
		*
		* Destroys the event specified by \p hEvent.
		*
		* \param hEvent - Event to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE
		* \notefnerr
		*
		* \sa ::cuEventCreate,
		* ::cuEventRecord,
		* ::cuEventQuery,
		* ::cuEventSynchronize,
		* ::cuEventElapsedTime
		*/
		CUresult CUDAAPI cuEventDestroy(CUevent hEvent);

		/**
		* \brief Computes the elapsed time between two events
		*
		* Computes the elapsed time between two events (in milliseconds with a
		* resolution of around 0.5 microseconds).
		*
		* If either event was last recorded in a non-NULL stream, the resulting ti
		me
		* may be greater than expected (even if both used the same stream handle).
		This
		* happens because the ::cuEventRecord() operation takes place asynchronous
		ly
		* and there is no guarantee that the measured latency is actually just bet
		ween
		* the two events. Any number of other different stream operations could ex
		ecute
		* in between the two measured events, thus altering the timing in a signif
		icant
		* way.
		*
		* If ::cuEventRecord() has not been called on either event then
		* ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been c
		alled
		* on both events but one or both of them has not yet been completed (that
		is,
		* ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of
		the
		* events), ::CUDA_ERROR_NOT_READY is returned. If either event was created
		with
		* the ::CU_EVENT_DISABLE_TIMING flag, then this function will return
		* ::CUDA_ERROR_INVALID_HANDLE.
		*
		* \param pMilliseconds - Time between \p hStart and \p hEnd in ms
		* \param hStart - Starting event
		* \param hEnd - Ending event
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_NOT_READY
		* \notefnerr
		*
		* \sa ::cuEventCreate,
		* ::cuEventRecord,
		* ::cuEventQuery,
		* ::cuEventSynchronize,
		* ::cuEventDestroy
		*/
		CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, C
		Uevent hEnd);

		/** @} / / END CUDA_EVENT */

		/**
		* \defgroup CUDA_EXEC Execution Control
		*
		* This section describes the execution control functions of the low-level
		CUDA
		* driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Sets the block-dimensions for the function
		*
		* Specifies the \p x, \p y, and \p z dimensions of the thread blocks that
		are
		* created when the kernel given by \p hfunc is launched.
		*
		* \param hfunc - Kernel to specify dimensions of
		* \param x - X dimension
		* \param y - Y dimension
		* \param z - Z dimension
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetSharedSize,
		* ::cuFuncSetCacheConfig,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSeti,
		* ::cuParamSetf,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
		;

		/**
		* \brief Sets the dynamic shared-memory size for the function
		*
		* Sets through \p bytes the amount of dynamic shared memory that will be
		* available to each thread block when the kernel given by \p hfunc is laun
		ched.
		*
		* \param hfunc - Kernel to specify dynamic shared-memory size for
		* \param bytes - Dynamic shared-memory size per thread in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetCacheConfig,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSeti,
		* ::cuParamSetf,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);

		/**
		* \brief Returns information about a function
		*
		* Returns in \p *pi the integer value of the attribute \p attrib on the ke
		rnel
		* given by \p hfunc. The supported attributes are:
		* - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threa
		ds
		* per block, beyond which a launch of the function would fail. This numb
		er
		* depends on both the function and the device on which the function is
		* currently loaded.
		* - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of
		* statically-allocated shared memory per block required by this function
		.
		* This does not include dynamically-allocated shared memory requested by
		* the user at runtime.
		* - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-alloca
		ted
		* constant memory required by this function.
		* - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memor
		y
		* used by each thread of this function.
		* - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thr
		ead
		* of this function.
		* - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version
		for
		* which the function was compiled. This value is the major PTX version *
		10
		* + the minor PTX version, so a PTX version 1.3 function would return th
		e
		* value 13. Note that this may return the undefined value of 0 for cubin
		s
		* compiled prior to CUDA 3.0.
		* - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version fo
		r
		* which the function was compiled. This value is the major binary
		* version * 10 + the minor binary version, so a binary version 1.3 funct
		ion
		* would return the value 13. Note that this will return a value of 10 fo
		r
		* legacy cubins that do not have a properly-encoded binary architecture
		* version.
		*
		* \param pi - Returned attribute value
		* \param attrib - Attribute requested
		* \param hfunc - Function to query attribute of
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncSetCacheConfig,
		* ::cuParamSetSize,
		* ::cuParamSeti,
		* ::cuParamSetf,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, C
		Ufunction hfunc);

		/**
		* \brief Sets the preferred cache configuration for a device function
		*
		* On devices where the L1 cache and shared memory use the same hardware
		* resources, this sets through \p config the preferred cache configuration
		for
		* the device function \p hfunc. This is only a preference. The driver will
		use
		* the requested configuration if possible, but it is free to choose a diff
		erent
		* configuration if required to execute \p hfunc. Any context-wide prefere
		nce
		* set via ::cuCtxSetCacheConfig() will be overridden by this per-function
		* setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE.
		In
		* that case, the current context-wide setting will be used.
		*
		* This setting does nothing on devices where the size of the L1 cache and
		* shared memory are fixed.
		*
		* Launching a kernel with a different preference than the most recent
		* preference setting may insert a device-side synchronization point.
		*
		*
		* The supported cache configurations are:
		* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (de
		fault)
		* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller
		L1 cache
		* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared m
		emory
		*
		* \param hfunc - Kernel to configure cache for
		* \param config - Requested cache configuration
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT
		* \notefnerr
		*
		* \sa ::cuCtxGetCacheConfig,
		* ::cuCtxSetCacheConfig,
		* ::cuFuncSetBlockShape,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSeti,
		* ::cuParamSetf,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config
		);

		/**
		* \brief Sets the parameter size for the function
		*
		* Sets through \p numbytes the total size in bytes needed by the function
		* parameters of the kernel corresponding to \p hfunc.
		*
		* \param hfunc - Kernel to set parameter size for
		* \param numbytes - Size of parameter list in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetf,
		* ::cuParamSeti,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);

		/**
		* \brief Adds an integer parameter to the function's argument list
		*
		* Sets an integer parameter that will be specified the next time the
		* kernel corresponding to \p hfunc will be invoked. \p offset is a byte of
		fset.
		*
		* \param hfunc - Kernel to add parameter to
		* \param offset - Offset to add parameter to argument list
		* \param value - Value of parameter
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSetf,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, unsigned int val
		ue);

		/**
		* \brief Adds a floating-point parameter to the function's argument list
		*
		* Sets a floating-point parameter that will be specified the next time the
		* kernel corresponding to \p hfunc will be invoked. \p offset is a byte of
		fset.
		*
		* \param hfunc - Kernel to add parameter to
		* \param offset - Offset to add parameter to argument list
		* \param value - Value of parameter
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSeti,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, float value);

		/**
		* \brief Adds arbitrary data to the function's argument list
		*
		* Copies an arbitrary amount of data (specified in \p numbytes) from \p pt
		r
		* into the parameter space of the kernel corresponding to \p hfunc. \p off
		set
		* is a byte offset.
		*
		* \param hfunc - Kernel to add data to
		* \param offset - Offset to add data to argument list
		* \param ptr - Pointer to arbitrary data
		* \param numbytes - Size of data to copy in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSetf,
		* ::cuParamSeti,
		* ::cuLaunch,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsig
		ned int numbytes);

		/**
		* \brief Launches a CUDA function
		*
		* Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block
		* contains the number of threads specified by a previous call to
		* ::cuFuncSetBlockShape().
		*
		* \param f - Kernel to launch
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_LAUNCH_FAILED,
		* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
		* ::CUDA_ERROR_LAUNCH_TIMEOUT,
		* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSetf,
		* ::cuParamSeti,
		* ::cuParamSetv,
		* ::cuLaunchGrid,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuLaunch(CUfunction f);

		/**
		* \brief Launches a CUDA function
		*
		* Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
		* blocks. Each block contains the number of threads specified by a previou
		s
		* call to ::cuFuncSetBlockShape().
		*
		* \param f - Kernel to launch
		* \param grid_width - Width of grid in blocks
		* \param grid_height - Height of grid in blocks
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_LAUNCH_FAILED,
		* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
		* ::CUDA_ERROR_LAUNCH_TIMEOUT,
		* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSetf,
		* ::cuParamSeti,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGridAsync
		*/
		CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, int grid_height
		);

		/**
		* \brief Launches a CUDA function
		*
		* Invokes the kernel \p f on a \p grid_width x \p grid_height grid of
		* blocks. Each block contains the number of threads specified by a previou
		s
		* call to ::cuFuncSetBlockShape().
		*
		* ::cuLaunchGridAsync() can optionally be associated to a stream by passin
		g a
		* non-zero \p hStream argument.
		*
		* \param f - Kernel to launch
		* \param grid_width - Width of grid in blocks
		* \param grid_height - Height of grid in blocks
		* \param hStream - Stream identifier
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_LAUNCH_FAILED,
		* ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
		* ::CUDA_ERROR_LAUNCH_TIMEOUT,
		* ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
		* ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
		* \notefnerr
		*
		* \sa ::cuFuncSetBlockShape,
		* ::cuFuncSetSharedSize,
		* ::cuFuncGetAttribute,
		* ::cuParamSetSize,
		* ::cuParamSetf,
		* ::cuParamSeti,
		* ::cuParamSetv,
		* ::cuLaunch,
		* ::cuLaunchGrid
		*/
		CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, int grid_width, int grid_h
		eight, CUstream hStream);

		/**
		* \defgroup CUDA_EXEC_DEPRECATED Execution Control [DEPRECATED]
		*
		* This section describes the deprecated execution control functions of the
		* low-level CUDA driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Adds a texture-reference to the function's argument list
		*
		* \deprecated
		*
		* Makes the CUDA array or linear memory bound to the texture reference
		* \p hTexRef available to a device program as a texture. In this version o
		f
		* CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() a
		nd
		* the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT.
		*
		* \param hfunc - Kernel to add texture-reference to
		* \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT)
		* \param hTexRef - Texture-reference to add to argument list
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*/
		CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref h
		TexRef);
		/** @} / / END CUDA_EXEC_DEPRECATED */

		/** @} / / END CUDA_EXEC */

		/**
		* \defgroup CUDA_TEXREF Texture Reference Management
		*
		* This section describes the texture reference management functions of the
		* low-level CUDA driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Binds an array as a texture reference
		*
		* Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any
		* previous address or CUDA array state associated with the texture referen
		ce
		* is superseded by this function. \p Flags must be set to
		* ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef
		is
		* unbound.
		*
		* \param hTexRef - Texture reference to bind
		* \param hArray - Array to bind
		* \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT)
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigne
		d int Flags);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Binds an address as a texture reference
		*
		* Binds a linear address range to the texture reference \p hTexRef. Any
		* previous address or CUDA array state associated with the texture referen
		ce
		* is superseded by this function. Any memory previously bound to \p hTexRe
		f
		* is unbound.
		*
		* Since the hardware enforces an alignment requirement on texture base
		* addresses, ::cuTexRefSetAddress() passes back a byte offset in
		* \p *ByteOffset that must be applied to texture fetches in order to read
		from
		* the desired memory. This offset must be divided by the texel size and
		* passed to kernels that read from the texture so they can be applied to t
		he
		* ::tex1Dfetch() function.
		*
		* If the device memory pointer was returned from ::cuMemAlloc(), the offse
		t
		* is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parame
		ter.
		*
		* \param ByteOffset - Returned byte offset
		* \param hTexRef - Texture reference to bind
		* \param dptr - Device pointer to bind
		* \param bytes - Size of memory to bind in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray
		,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, C
		Udeviceptr dptr, size_t bytes);

		/**
		* \brief Binds an address as a 2D texture reference
		*
		* Binds a linear address range to the texture reference \p hTexRef. Any
		* previous address or CUDA array state associated with the texture referen
		ce
		* is superseded by this function. Any memory previously bound to \p hTexRe
		f
		* is unbound.
		*
		* Using a ::tex2D() function inside a kernel requires a call to either
		* ::cuTexRefSetArray() to bind the corresponding texture reference to an
		* array, or ::cuTexRefSetAddress2D() to bind the texture reference to line
		ar
		* memory.
		*
		* Function calls to ::cuTexRefSetFormat() cannot follow calls to
		* ::cuTexRefSetAddress2D() for the same texture reference.
		*
		* It is required that \p dptr be aligned to the appropriate hardware-speci
		fic
		* texture alignment. You can query this value using the device attribute
		* ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is
		* supplied, ::CUDA_ERROR_INVALID_VALUE is returned.
		*
		* \param hTexRef - Texture reference to bind
		* \param desc - Descriptor of CUDA array
		* \param dptr - Device pointer to bind
		* \param Pitch - Line pitch in bytes
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE
		SCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Sets the format for a texture reference
		*
		* Specifies the format of the data to be read by the texture reference
		* \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to t
		he
		* ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR struct
		ure:
		* They specify the format of each component and the number of components p
		er
		* array element.
		*
		* \param hTexRef - Texture reference
		* \param fmt - Format to set
		* \param NumPackedComponents - Number of components per array element
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, in
		t NumPackedComponents);

		/**
		* \brief Sets the addressing mode for a texture reference
		*
		* Specifies the addressing mode \p am for the given dimension \p dim of th
		e
		* texture reference \p hTexRef. If \p dim is zero, the addressing mode is
		* applied to the first parameter of the functions used to fetch from the
		* texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defi
		ned
		* as:
		* \code
		typedef enum CUaddress_mode_enum {
		CU_TR_ADDRESS_MODE_WRAP = 0,
		CU_TR_ADDRESS_MODE_CLAMP = 1,
		CU_TR_ADDRESS_MODE_MIRROR = 2,
		CU_TR_ADDRESS_MODE_BORDER = 3
		} CUaddress_mode;
		* \endcode
		*
		* Note that this call has no effect if \p hTexRef is bound to linear memor
		y.
		*
		* \param hTexRef - Texture reference
		* \param dim - Dimension
		* \param am - Addressing mode to set
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddres
		s_mode am);

		/**
		* \brief Sets the filtering mode for a texture reference
		*
		* Specifies the filtering mode \p fm to be used when reading memory throug
		h
		* the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as:
		*
		* \code
		typedef enum CUfilter_mode_enum {
		CU_TR_FILTER_MODE_POINT = 0,
		CU_TR_FILTER_MODE_LINEAR = 1
		} CUfilter_mode;
		* \endcode
		*
		* Note that this call has no effect if \p hTexRef is bound to linear memor
		y.
		*
		* \param hTexRef - Texture reference
		* \param fm - Filtering mode to set
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);

		/**
		* \brief Sets the flags for a texture reference
		*
		* Specifies optional flags via \p Flags to specify the behavior of data
		* returned through the texture reference \p hTexRef. The valid flags are:
		*
		* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
		* having the texture promote integer data to floating point data in the
		* range [0, 1];
		* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavio
		r
		* of having the texture coordinates range from [0, Dim) where Dim is the
		* width or height of the CUDA array. Instead, the texture coordinates
		* [0, 1.0) reference the entire breadth of the array dimension;
		*
		* \param hTexRef - Texture reference
		* \param Flags - Optional flags to set
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Gets the address associated with a texture reference
		*
		* Returns in \p *pdptr the base address bound to the texture reference
		* \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen
		ce
		* is not bound to any device memory range.
		*
		* \param pdptr - Returned device address
		* \param hTexRef - Texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Gets the array bound to a texture reference
		*
		* Returns in \p *phArray the CUDA array bound to the texture reference
		* \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture referen
		ce
		* is not bound to any CUDA array.
		*
		* \param phArray - Returned array
		* \param hTexRef - Texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);

		/**
		* \brief Gets the addressing mode used by a texture reference
		*
		* Returns in \p *pam the addressing mode corresponding to the
		* dimension \p dim of the texture reference \p hTexRef. Currently, the onl
		y
		* valid value for \p dim are 0 and 1.
		*
		* \param pam - Returned addressing mode
		* \param hTexRef - Texture reference
		* \param dim - Dimension
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexR
		ef, int dim);

		/**
		* \brief Gets the filter-mode used by a texture reference
		*
		* Returns in \p *pfm the filtering mode of the texture reference
		* \p hTexRef.
		*
		* \param pfm - Returned filtering mode
		* \param hTexRef - Texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFlags, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef
		);

		/**
		* \brief Gets the format used by a texture reference
		*
		* Returns in \p pFormat and \p pNumChannels the format and number
		* of components of the CUDA array bound to the texture reference \p hTexRe
		f.
		* If \p pFormat or \p pNumChannels is NULL, it will be ignored.
		*
		* \param pFormat - Returned format
		* \param pNumChannels - Returned number of components
		* \param hTexRef - Texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFlags
		*/
		CUresult CUDAAPI cuTexRefGetFormat(CUarray_format pFormat, int pNumChanne
		ls, CUtexref hTexRef);

		/**
		* \brief Gets the flags used by a texture reference
		*
		* Returns in \p *pFlags the flags of the texture reference \p hTexRef.
		*
		* \param pFlags - Returned flags
		* \param hTexRef - Texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefSetAddress,
		* ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray,
		* ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat,
		* ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray,
		* ::cuTexRefGetFilterMode, ::cuTexRefGetFormat
		*/
		CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);

		/**
		* \defgroup CUDA_TEXREF_DEPRECATED Texture Reference Management [DEPRECATE
		D]
		*
		* This section describes the deprecated texture reference management
		* functions of the low-level CUDA driver application programming interface
		.
		*
		* @{
		*/

		/**
		* \brief Creates a texture reference
		*
		* \deprecated
		*
		* Creates a texture reference and returns its handle in \p *pTexRef. Once
		* created, the application must call ::cuTexRefSetArray() or
		* ::cuTexRefSetAddress() to associate the reference with allocated memory.
		* Other texture reference functions are used to specify the format and
		* interpretation (addressing, filtering, etc.) to be used when the memory
		is
		* read through this texture reference.
		*
		* \param pTexRef - Returned texture reference
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefDestroy
		*/
		CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef);

		/**
		* \brief Destroys a texture reference
		*
		* \deprecated
		*
		* Destroys the texture reference specified by \p hTexRef.
		*
		* \param hTexRef - Texture reference to destroy
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuTexRefCreate
		*/
		CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef);

		/** @} / / END CUDA_TEXREF_DEPRECATED */

		/** @} / / END CUDA_TEXREF */

		/**
		* \defgroup CUDA_SURFREF Surface Reference Management
		*
		* This section describes the surface reference management functions of the
		* low-level CUDA driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Sets the CUDA array for a surface reference.
		*
		* Sets the CUDA array \p hArray to be read and written by the surface refe
		rence
		* \p hSurfRef. Any previous CUDA array state associated with the surface
		* reference is superseded by this function. \p Flags must be set to 0.
		* The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA arr
		ay.
		* Any CUDA array previously bound to \p hSurfRef is unbound.

		* \param hSurfRef - Surface reference handle
		* \param hArray - CUDA array handle
		* \param Flags - set to 0
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuModuleGetSurfRef, ::cuSurfRefGetArray
		*/
		CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsi
		gned int Flags);

		/**
		* \brief Passes back the CUDA array bound to a surface reference.
		*
		* Returns in \p *phArray the CUDA array bound to the surface reference
		* \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface refere
		nce
		* is not bound to any CUDA array.

		* \param phArray - Surface reference handle
		* \param hSurfRef - Surface reference handle
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		*
		* \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray
		*/
		CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);

		/** @} / / END CUDA_SURFREF */

		/**
		* \defgroup CUDA_GRAPHICS Graphics Interoperability
		*
		* This section describes the graphics interoperability functions of the
		* low-level CUDA driver application programming interface.
		*
		* @{
		*/

		/**
		* \brief Unregisters a graphics resource for access by CUDA
		*
		* Unregisters the graphics resource \p resource so it is not accessible by
		* CUDA unless registered again.
		*
		* If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is
		* returned.
		*
		* \param resource - Resource to unregister
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa
		* ::cuGraphicsD3D9RegisterResource,
		* ::cuGraphicsD3D10RegisterResource,
		* ::cuGraphicsD3D11RegisterResource,
		* ::cuGraphicsGLRegisterBuffer,
		* ::cuGraphicsGLRegisterImage
		*/
		CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource);

		/**
		* \brief Get an array through which to access a subresource of a mapped gr
		aphics resource.
		*
		* Returns in \p *pArray an array through which the subresource of the mapp
		ed
		* graphics resource \p resource which corresponds to array index \p arrayI
		ndex
		* and mipmap level \p mipLevel may be accessed. The value set in \p *pArr
		ay may
		* change every time that \p resource is mapped.
		*
		* If \p resource is not a texture then it cannot be accessed via an array
		and
		* ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned.
		* If \p arrayIndex is not a valid array index for \p resource then
		* ::CUDA_ERROR_INVALID_VALUE is returned.
		* If \p mipLevel is not a valid mipmap level for \p resource then
		* ::CUDA_ERROR_INVALID_VALUE is returned.
		* If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
		*
		* \param pArray - Returned array through which a subresource of \p re
		source may be accessed
		* \param resource - Mapped resource to access
		* \param arrayIndex - Array index for array textures or cubemap face
		* index as defined by ::CUarray_cubemap_face for
		* cubemap textures for the subresource to access
		* \param mipLevel - Mipmap level for the subresource to access
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_NOT_MAPPED
		* ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY
		* \notefnerr
		*
		* \sa ::cuGraphicsResourceGetMappedPointer
		*/
		CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgra
		phicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Get a device pointer through which to access a mapped graphics re
		source.
		*
		* Returns in \p *pDevPtr a pointer through which the mapped graphics resou
		rce
		* \p resource may be accessed.
		* Returns in \p pSize the size of the memory in bytes which may be accesse
		d from that pointer.
		* The value set in \p pPointer may change every time that \p resource is m
		apped.
		*
		* If \p resource is not a buffer then it cannot be accessed via a pointer
		and
		* ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned.
		* If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned.
		* *
		* \param pDevPtr - Returned pointer through which \p resource may be ac
		cessed
		* \param pSize - Returned size of the buffer accessible starting at \
		p *pPointer
		* \param resource - Mapped resource to access
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_NOT_MAPPED
		* ::CUDA_ERROR_NOT_MAPPED_AS_POINTER
		* \notefnerr
		*
		* \sa
		* ::cuGraphicsMapResources,
		* ::cuGraphicsSubResourceGetMappedArray
		*/
		CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, s
		ize_t *pSize, CUgraphicsResource resource);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Set usage flags for mapping a graphics resource
		*
		* Set \p flags for mapping the graphics resource \p resource.
		*
		* Changes to \p flags will take effect the next time \p resource is mapped
		.
		* The \p flags argument may be any of the following:

		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
		is
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA kernels. This is the default value.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: Specifies that CUDA kernels
		which
		* access this resource will not write to this resource.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA ker
		nels
		* which access this resource will not read from this resource and will
		* write over the entire contents of the resource, so none of the data
		* previously stored in the resource will be preserved.
		*
		* If \p resource is presently mapped for access by CUDA then
		* ::CUDA_ERROR_ALREADY_MAPPED is returned.
		* If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VAL
		UE is returned.
		*
		* \param resource - Registered resource to set flags for
		* \param flags - Parameters for resource mapping
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED
		* \notefnerr
		*
		* \sa
		* ::cuGraphicsMapResources
		*/
		CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource,
		unsigned int flags);

		/**
		* \brief Map graphics resources for access by CUDA
		*
		* Maps the \p count graphics resources in \p resources for access by CUDA.
		*
		* The resources in \p resources may be accessed by CUDA until they
		* are unmapped. The graphics API from which \p resources were registered
		* should not access any resources while they are mapped by CUDA. If an
		* application does so, the results are undefined.
		*
		* This function provides the synchronization guarantee that any graphics c
		alls
		* issued before ::cuGraphicsMapResources() will complete before any subseq
		uent CUDA
		* work issued in \p stream begins.
		*
		* If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID
		_HANDLE is returned.
		* If any of \p resources are presently mapped for access by CUDA then ::CU
		DA_ERROR_ALREADY_MAPPED is returned.
		*
		* \param count - Number of resources to map
		* \param resources - Resources to map for CUDA usage
		* \param hStream - Stream with which to synchronize
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa
		* ::cuGraphicsResourceGetMappedPointer
		* ::cuGraphicsSubResourceGetMappedArray
		* ::cuGraphicsUnmapResources
		*/
		CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, CUgraphicsResou
		rce *resources, CUstream hStream);

		/**
		* \brief Unmap graphics resources.
		*
		* Unmaps the \p count graphics resources in \p resources.
		*
		* Once unmapped, the resources in \p resources may not be accessed by CUDA
		* until they are mapped again.
		*
		* This function provides the synchronization guarantee that any CUDA work
		issued
		* in \p stream before ::cuGraphicsUnmapResources() will complete before an
		y
		* subsequently issued graphics work begins.
		*
		*
		* If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID
		_HANDLE is returned.
		* If any of \p resources are not presently mapped for access by CUDA then
		::CUDA_ERROR_NOT_MAPPED is returned.
		*
		* \param count - Number of resources to unmap
		* \param resources - Resources to unmap
		* \param hStream - Stream with which to synchronize
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_NOT_MAPPED,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa
		* ::cuGraphicsMapResources
		*/
		CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, CUgraphicsRes
		ource *resources, CUstream hStream);

		/** @} / / END CUDA_GRAPHICS */

		CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, const CUuuid
		*pExportTableId);

		/** @} / / END CUDA_DRIVER */

		/**
		* CUDA API versioning support
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL)
		#undef cuDeviceTotalMem
		#undef cuCtxCreate
		#undef cuModuleGetGlobal
		#undef cuMemGetInfo
		#undef cuMemAlloc
		#undef cuMemAllocPitch
		#undef cuMemFree
		#undef cuMemGetAddressRange
		#undef cuMemAllocHost
		#undef cuMemHostGetDevicePointer
		#undef cuMemcpyHtoD
		#undef cuMemcpyDtoH
		#undef cuMemcpyDtoD
		#undef cuMemcpyDtoA
		#undef cuMemcpyAtoD
		#undef cuMemcpyHtoA
		#undef cuMemcpyAtoH
		#undef cuMemcpyAtoA
		#undef cuMemcpyHtoAAsync
		#undef cuMemcpyAtoHAsync
		#undef cuMemcpy2D
		#undef cuMemcpy2DUnaligned
		#undef cuMemcpy3D
		#undef cuMemcpyHtoDAsync
		#undef cuMemcpyDtoHAsync
		#undef cuMemcpyDtoDAsync
		#undef cuMemcpy2DAsync
		#undef cuMemcpy3DAsync
		#undef cuMemsetD8
		#undef cuMemsetD16
		#undef cuMemsetD32
		#undef cuMemsetD2D8
		#undef cuMemsetD2D16
		#undef cuMemsetD2D32
		#undef cuArrayCreate
		#undef cuArrayGetDescriptor
		#undef cuArray3DCreate
		#undef cuArray3DGetDescriptor
		#undef cuTexRefSetAddress
		#undef cuTexRefSetAddress2D
		#undef cuTexRefGetAddress
		#undef cuGraphicsResourceGetMappedPointer
		#endif /* __CUDA_API_VERSION_INTERNAL */

		/**
		* CUDA API made obselete at API version 3020
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL)
		#define CUdeviceptr CUdeviceptr_v1
		#define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st
		#define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1
		#define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st
		#define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1
		#define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st
		#define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1
		#define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st
		#define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1
		#endif /* CUDA_FORCE_LEGACY32_INTERNAL */

		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION < 3020

		typedef unsigned int CUdeviceptr;

		typedef struct CUDA_MEMCPY2D_st
		{
		unsigned int srcXInBytes; /*< Source X in bytes /
		unsigned int srcY; /*< Source Y /
		CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
		y) */
		const void srcHost; /< Source host pointer /
		CUdeviceptr srcDevice; /*< Source device pointer /
		CUarray srcArray; /*< Source array reference /
		unsigned int srcPitch; /**< Source pitch (ignored when src is arra
		y) */

		unsigned int dstXInBytes; /*< Destination X in bytes /
		unsigned int dstY; /*< Destination Y /
		CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
		array) */
		void dstHost; /< Destination host pointer /
		CUdeviceptr dstDevice; /*< Destination device pointer /
		CUarray dstArray; /*< Destination array reference /
		unsigned int dstPitch; /**< Destination pitch (ignored when dst is
		array) */

		unsigned int WidthInBytes; /*< Width of 2D memory copy in bytes /
		unsigned int Height; /*< Height of 2D memory copy /
		} CUDA_MEMCPY2D;

		typedef struct CUDA_MEMCPY3D_st
		{
		unsigned int srcXInBytes; /*< Source X in bytes /
		unsigned int srcY; /*< Source Y /
		unsigned int srcZ; /*< Source Z /
		unsigned int srcLOD; /*< Source LOD /
		CUmemorytype srcMemoryType; /**< Source memory type (host, device, arra
		y) */
		const void srcHost; /< Source host pointer /
		CUdeviceptr srcDevice; /*< Source device pointer /
		CUarray srcArray; /*< Source array reference /
		void reserved0; /< Must be NULL /
		unsigned int srcPitch; /**< Source pitch (ignored when src is arra
		y) */
		unsigned int srcHeight; /**< Source height (ignored when src is arr
		ay; may be 0 if Depth==1) */

		unsigned int dstXInBytes; /*< Destination X in bytes /
		unsigned int dstY; /*< Destination Y /
		unsigned int dstZ; /*< Destination Z /
		unsigned int dstLOD; /*< Destination LOD /
		CUmemorytype dstMemoryType; /**< Destination memory type (host, device,
		array) */
		void dstHost; /< Destination host pointer /
		CUdeviceptr dstDevice; /*< Destination device pointer /
		CUarray dstArray; /*< Destination array reference /
		void reserved1; /< Must be NULL /
		unsigned int dstPitch; /**< Destination pitch (ignored when dst is
		array) */
		unsigned int dstHeight; /**< Destination height (ignored when dst i
		s array; may be 0 if Depth==1) */

		unsigned int WidthInBytes; /*< Width of 3D memory copy in bytes /
		unsigned int Height; /*< Height of 3D memory copy /
		unsigned int Depth; /*< Depth of 3D memory copy /
		} CUDA_MEMCPY3D;

		typedef struct CUDA_ARRAY_DESCRIPTOR_st
		{
		unsigned int Width; /*< Width of array /
		unsigned int Height; /*< Height of array /

		CUarray_format Format; /*< Array format /
		unsigned int NumChannels; /*< Channels per array element /
		} CUDA_ARRAY_DESCRIPTOR;

		typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
		{
		unsigned int Width; /*< Width of 3D array /
		unsigned int Height; /*< Height of 3D array /
		unsigned int Depth; /*< Depth of 3D array /

		CUarray_format Format; /*< Array format /
		unsigned int NumChannels; /*< Channels per array element /
		unsigned int Flags; /*< Flags /
		} CUDA_ARRAY3D_DESCRIPTOR;

		CUresult CUDAAPI cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
		CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice
		dev);
		CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr dptr, unsigned int bytes,
		CUmodule hmod, const char *name);
		CUresult CUDAAPI cuMemGetInfo(unsigned int free, unsigned int total);
		CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize);
		CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr dptr, unsigned int pPitch, u
		nsigned int WidthInBytes, unsigned int Height, unsigned int ElementSizeByte
		s);
		CUresult CUDAAPI cuMemFree(CUdeviceptr dptr);
		CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr pbase, unsigned int psi
		ze, CUdeviceptr dptr);
		CUresult CUDAAPI cuMemAllocHost(void **pp, unsigned int bytesize);
		CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr pdptr, void p, uns
		igned int Flags);
		CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, u
		nsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigne
		d int ByteCount);
		CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice,
		unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUd
		eviceptr srcDevice, unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsi
		gned int srcOffset, unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, con
		st void *srcHost, unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int
		srcOffset, unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUa
		rray srcArray, unsigned int srcOffset, unsigned int ByteCount);
		CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset
		, const void *srcHost, unsigned int ByteCount, CUstream hStream);
		CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigne
		d int srcOffset, unsigned int ByteCount, CUstream hStream);
		CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
		CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
		CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
		CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHo
		st, unsigned int ByteCount, CUstream hStream);
		CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, un
		signed int ByteCount, CUstream hStream);
		CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDe
		vice, unsigned int ByteCount, CUstream hStream);
		CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStre
		am);
		CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStre
		am);
		CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsign
		ed int N);
		CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsi
		gned int N);
		CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsign
		ed int N);
		CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch,
		unsigned char uc, unsigned int Width, unsigned int Height);
		CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch
		, unsigned short us, unsigned int Width, unsigned int Height);
		CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch
		, unsigned int ui, unsigned int Width, unsigned int Height);
		CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTO
		R *pAllocateArray);
		CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescript
		or, CUarray hArray);
		CUresult CUDAAPI cuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCR
		IPTOR *pAllocateArray);
		CUresult CUDAAPI cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDesc
		riptor, CUarray hArray);
		CUresult CUDAAPI cuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTex
		Ref, CUdeviceptr dptr, unsigned int bytes);
		CUresult CUDAAPI cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DE
		SCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch);
		CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
		CUresult CUDAAPI cuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, u
		nsigned int *pSize, CUgraphicsResource resource);

		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION < 3020 */

		#if defined(__CUDA_API_VERSION_INTERNAL)
		#undef CUdeviceptr
		#undef CUDA_MEMCPY2D_st
		#undef CUDA_MEMCPY2D
		#undef CUDA_MEMCPY3D_st
		#undef CUDA_MEMCPY3D
		#undef CUDA_ARRAY_DESCRIPTOR_st
		#undef CUDA_ARRAY_DESCRIPTOR
		#undef CUDA_ARRAY3D_DESCRIPTOR_st
		#undef CUDA_ARRAY3D_DESCRIPTOR
		#endif /* __CUDA_API_VERSION_INTERNAL */

	#ifdef __cplusplus	#ifdef __cplusplus
	}	}
	#endif	#endif


		#undef __CUDA_API_VERSION

	#endif /* __cuda_cuda_h__ */	#endif /* __cuda_cuda_h__ */

End of changes. 111 change blocks.
	628 lines changed or deleted	6635 lines changed or added

	cudaGL.h	cudaGL.h

	skipping to change at line 39	skipping to change at line 39
	* source code with only those rights set forth herein.	* source code with only those rights set forth herein.
	*	*
	* Any use of this source code in individual and commercial software must	* Any use of this source code in individual and commercial software must
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#ifndef CUDAGL_H	#ifndef CUDAGL_H
	#define CUDAGL_H	#define CUDAGL_H


		/**
		* CUDA API versioning support
		*/
		#if defined(CUDA_FORCE_API_VERSION)
		#if (CUDA_FORCE_API_VERSION == 3010)
		#define __CUDA_API_VERSION 3010
		#else
		#error "Unsupported value of CUDA_FORCE_API_VERSION"
		#endif
		#else
		#define __CUDA_API_VERSION 3020
		#endif /* CUDA_FORCE_API_VERSION */

		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION >= 3020
		#define cuGLCtxCreate cuGLCtxCreate_v2
		#define cuGLMapBufferObject cuGLMapBufferObject_v2
		#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION >= 3020 */

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif


	CUresult CUDAAPI cuGLCtxCreate( CUcontext *pCtx, unsigned int Flags, CUdevi	/**
	ce device );	* \defgroup CUDA_GL OpenGL Interoperability
	CUresult CUDAAPI cuGraphicsGLRegisterBuffer( CUgraphicsResource *pCudaResou	* \ingroup CUDA_DRIVER
	rce, GLuint buffer, unsigned int Flags );	*
	CUresult CUDAAPI cuGraphicsGLRegisterImage( CUgraphicsResource *pCudaResour	* This section describes the OpenGL interoperability functions of the
	ce, GLuint image, GLenum target, unsigned int Flags );	* low-level CUDA driver application programming interface.
		*
		* @{
		*/

	#if defined(_WIN32)	#if defined(_WIN32)
	#if !defined(WGL_NV_gpu_affinity)	#if !defined(WGL_NV_gpu_affinity)
	typedef void* HGPUNV;	typedef void* HGPUNV;
	#endif	#endif

	CUresult CUDAAPI cuWGLGetDevice( CUdevice *pDevice, HGPUNV hGpu );	#endif /* _WIN32 */
	#endif


	//	#if __CUDA_API_VERSION >= 3020
	// CUDA 2.x compatibility API. These functions are deprecated, please use t	/**
	he ones above.	* \brief Create a CUDA context for interoperability with OpenGL
	//	*
		* Creates a new CUDA context, initializes OpenGL interoperability, and
		* associates the CUDA context with the calling thread. It must be called
		* before performing any other OpenGL interoperability operations. It may f
		ail
		* if the needed OpenGL driver facilities are not available. For usage of t
		he
		* \p Flags parameter, see ::cuCtxCreate().
		*
		* \param pCtx - Returned CUDA context
		* \param Flags - Options for CUDA context creation
		* \param device - Device on which to create the context
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
		* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
		* ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
		* ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
		* ::cuWGLGetDevice
		*/
		CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevic
		e device );
		#endif /* __CUDA_API_VERSION >= 3020 */


	// Flags to map or unmap a resource	/**
		* \brief Registers an OpenGL buffer object
		*
		* Registers the buffer object specified by \p buffer for access by
		* CUDA. A handle to the registered object is returned as \p
		* pCudaResource. The map flags \p Flags specify the intended usage,
		* as follows:
		*
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
		is
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
		* will not write to this resource.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* \param pCudaResource - Pointer to the returned object handle
		* \param buffer - name of buffer object to be registered
		* \param Flags - Map flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* \notefnerr
		*
		* \sa
		* ::cuGLCtxCreate,
		* ::cuGraphicsUnregisterResource,
		* ::cuGraphicsMapResources,
		* ::cuGraphicsResourceGetMappedPointer
		*/
		CUresult CUDAAPI cuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResour
		ce, GLuint buffer, unsigned int Flags);

		/**
		* \brief Register an OpenGL texture or renderbuffer object
		*
		* Registers the texture or renderbuffer object specified by \p image for a
		ccess by CUDA.
		* \p target must match the type of the object.
		* A handle to the registered object is returned as \p pCudaResource.
		* The map flags \p Flags specify the intended usage, as follows:
		*
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
		is
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
		* will not write to this resource.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* The following image classes are currently disallowed:
		* - Textures with borders
		* - Multisampled renderbuffers
		*
		* \param pCudaResource - Pointer to the returned object handle
		* \param image - name of texture or renderbuffer object to be registered
		* \param target - Identifies the type of object specified by \p image, and
		must be one of
		* ::GL_TEXTURE_2D,
		* ::GL_TEXTURE_RECTANGLE,
		* ::GL_TEXTURE_CUBE_MAP,
		* ::GL_TEXTURE_3D,
		* ::GL_TEXTURE_2D_ARRAY, or
		* ::GL_RENDERBUFFER.
		* \param Flags - Map flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* \notefnerr
		*
		* \sa
		* ::cuGLCtxCreate,
		* ::cuGraphicsUnregisterResource,
		* ::cuGraphicsMapResources,
		* ::cuGraphicsSubResourceGetMappedArray
		*/
		CUresult CUDAAPI cuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResourc
		e, GLuint image, GLenum target, unsigned int Flags);

		#ifdef _WIN32
		/**
		* \brief Gets the CUDA device associated with hGpu
		*
		* Returns in \p *pDevice the CUDA device associated with a \p hGpu, if
		* applicable.
		*
		* \param pDevice - Device associated with hGpu
		* \param hGpu - Handle to a GPU, as queried via ::WGL_NV_gpu_affinity()
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuGLCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
		* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
		* ::cuGLUnregisterBufferObject, ::cuGLUnmapBufferObjectAsync,
		* ::cuGLSetBufferObjectMapFlags
		*/
		CUresult CUDAAPI cuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu);
		#endif /* _WIN32 */

		/**
		* \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED]
		* This section describes deprecated OpenGL interoperability functionality.
		*
		* @{
		*/

		/** Flags to map or unmap a resource */
	typedef enum CUGLmap_flags_enum {	typedef enum CUGLmap_flags_enum {
	CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,	CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
	CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,	CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
	CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,	CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
	} CUGLmap_flags;	} CUGLmap_flags;


		/**
		* \brief Initializes OpenGL interoperability
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Initializes OpenGL interoperability. This function is deprecated
		* and calling it is no longer required. It may fail if the needed
		* OpenGL driver facilities are not available.
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_UNKNOWN
		* \notefnerr
		*
		* \sa ::cuGLCtxCreate, ::cuGLMapBufferObject,
		* ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
		* ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
		* ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
		* ::cuWGLGetDevice
		*/
	CUresult CUDAAPI cuGLInit(void);	CUresult CUDAAPI cuGLInit(void);

	CUresult CUDAAPI cuGLRegisterBufferObject( GLuint buffer );
	CUresult CUDAAPI cuGLMapBufferObject( CUdeviceptr dptr, unsigned int size
	, GLuint buffer );
	CUresult CUDAAPI cuGLUnmapBufferObject( GLuint buffer );
	CUresult CUDAAPI cuGLUnregisterBufferObject( GLuint buffer );


	CUresult CUDAAPI cuGLSetBufferObjectMapFlags( GLuint buffer, unsigned int F	/**
	lags );	* \brief Registers an OpenGL buffer object
	CUresult CUDAAPI cuGLMapBufferObjectAsync( CUdeviceptr *dptr, unsigned int	*
	*size, GLuint buffer, CUstream hStream );	* \deprecated This function is deprecated as of Cuda 3.0.
	CUresult CUDAAPI cuGLUnmapBufferObjectAsync( GLuint buffer, CUstream hStrea	*
	m );	* Registers the buffer object specified by \p buffer for access by
		* CUDA. This function must be called before CUDA can map the buffer
		* object. There must be a valid OpenGL context bound to the current
		* thread when this function is called, and the buffer name is
		* resolved by that context.
		*
		* \param buffer - The name of the buffer object to register.
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_ALREADY_MAPPED
		* \notefnerr
		*
		* \sa ::cuGraphicsGLRegisterBuffer
		*/
		CUresult CUDAAPI cuGLRegisterBufferObject(GLuint buffer);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Maps an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Maps the buffer object specified by \p buffer into the address space of
		the
		* current CUDA context and returns in \p dptr and \p size the base point
		er
		* and size of the resulting mapping.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* All streams in the current CUDA context are synchronized with the
		* current GL context.
		*
		* \param dptr - Returned mapped base pointer
		* \param size - Returned size of mapping
		* \param buffer - The name of the buffer object to map
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_MAP_FAILED
		* \notefnerr
		*
		* \sa ::cuGraphicsMapResources
		*/
		CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr dptr, size_t size, GLui
		nt buffer);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Unmaps an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unmaps the buffer object specified by \p buffer for access by CUDA.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* All streams in the current CUDA context are synchronized with the
		* current GL context.
		*
		* \param buffer - Buffer object to unmap
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuGraphicsUnmapResources
		*/
		CUresult CUDAAPI cuGLUnmapBufferObject(GLuint buffer);

		/**
		* \brief Unregister an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unregisters the buffer object specified by \p buffer. This
		* releases any resources associated with the registered buffer.
		* After this call, the buffer may no longer be mapped for access by
		* CUDA.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* \param buffer - Name of the buffer object to unregister
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuGraphicsUnregisterResource
		*/
		CUresult CUDAAPI cuGLUnregisterBufferObject(GLuint buffer);

		/**
		* \brief Set the map flags for an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Sets the map flags for the buffer object specified by \p buffer.
		*
		* Changes to \p Flags will take effect the next time \p buffer is mapped.
		* The \p Flags argument may be any of the following:
		* - ::CU_GL_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA kernels. This is the default value.
		* - ::CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels whic
		h
		* access this resource will not write to this resource.
		* - ::CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
		* which access this resource will not read from this resource and will
		* write over the entire contents of the resource, so none of the data
		* previously stored in the resource will be preserved.
		*
		* If \p buffer has not been registered for use with CUDA, then
		* ::CUDA_ERROR_INVALID_HANDLE is returned. If \p buffer is presently
		* mapped for access by CUDA, then ::CUDA_ERROR_ALREADY_MAPPED is returned.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* \param buffer - Buffer object to unmap
		* \param Flags - Map flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* \notefnerr
		*
		* \sa ::cuGraphicsResourceSetMapFlags
		*/
		CUresult CUDAAPI cuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Fl
		ags);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Maps an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Maps the buffer object specified by \p buffer into the address space of
		the
		* current CUDA context and returns in \p dptr and \p size the base point
		er
		* and size of the resulting mapping.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* Stream \p hStream in the current CUDA context is synchronized with
		* the current GL context.
		*
		* \param dptr - Returned mapped base pointer
		* \param size - Returned size of mapping
		* \param buffer - The name of the buffer object to map
		* \param hStream - Stream to synchronize
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_MAP_FAILED
		* \notefnerr
		*
		* \sa ::cuGraphicsMapResources
		*/
		CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr dptr, size_t size,
		GLuint buffer, CUstream hStream);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Unmaps an OpenGL buffer object
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unmaps the buffer object specified by \p buffer for access by CUDA.
		*
		* There must be a valid OpenGL context bound to the current thread
		* when this function is called. This must be the same context, or a
		* member of the same shareGroup, as the context that was bound when
		* the buffer was registered.
		*
		* Stream \p hStream in the current CUDA context is synchronized with
		* the current GL context.
		*
		* \param buffer - Name of the buffer object to unmap
		* \param hStream - Stream to synchronize
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuGraphicsUnmapResources
		*/
		CUresult CUDAAPI cuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream
		);

		/** @} / / END CUDA_GL_DEPRECATED */
		/** @} / / END CUDA_GL */

		/**
		* CUDA API versioning support
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL)
		#undef cuGLCtxCreate
		#undef cuGLMapBufferObject
		#undef cuGLMapBufferObjectAsync
		#endif /* __CUDA_API_VERSION_INTERNAL */

		/**
		* CUDA API made obselete at API version 3020
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL)
		#define CUdeviceptr CUdeviceptr_v1
		#endif /* __CUDA_API_VERSION_INTERNAL */

		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION < 3020
		CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevic
		e device );
		CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr dptr, unsigned int size,
		GLuint buffer);
		CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr dptr, unsigned int
		size, GLuint buffer, CUstream hStream);
		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION < 3020 */

		#if defined(__CUDA_API_VERSION_INTERNAL)
		#undef CUdeviceptr
		#endif /* __CUDA_API_VERSION_INTERNAL */

	#ifdef __cplusplus	#ifdef __cplusplus
	};	};
	#endif	#endif


		#undef __CUDA_API_VERSION

	#endif	#endif

End of changes. 9 change blocks.
	24 lines changed or deleted	485 lines changed or added

	cudaVDPAU.h	cudaVDPAU.h

	skipping to change at line 39	skipping to change at line 39
	* source code with only those rights set forth herein.	* source code with only those rights set forth herein.
	*	*
	* Any use of this source code in individual and commercial software must	* Any use of this source code in individual and commercial software must
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#ifndef CUDAVDPAU_H	#ifndef CUDAVDPAU_H
	#define CUDAVDPAU_H	#define CUDAVDPAU_H


		/**
		* CUDA API versioning support
		*/
		#if defined(CUDA_FORCE_API_VERSION)
		#if (CUDA_FORCE_API_VERSION == 3010)
		#define __CUDA_API_VERSION 3010
		#else
		#error "Unsupported value of CUDA_FORCE_API_VERSION"
		#endif
		#else
		#define __CUDA_API_VERSION 3020
		#endif /* CUDA_FORCE_API_VERSION */

		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION >= 3020
		#define cuVDPAUCtxCreate cuVDPAUCtxCreate_v2
		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION >= 3020 */

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif


	CUresult CUDAAPI cuVDPAUGetDevice( CUdevice *pDevice, VdpDevice vdpDevice,	/**
	VdpGetProcAddress *vdpGetProcAddress );	* \defgroup CUDA_VDPAU VDPAU Interoperability
	CUresult CUDAAPI cuVDPAUCtxCreate( CUcontext *pCtx, unsigned int flags, CUd	* \ingroup CUDA_DRIVER
	evice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress );	*
	CUresult CUDAAPI cuGraphicsVDPAURegisterVideoSurface( CUgraphicsResource *p	* This section describes the VDPAU interoperability functions of the
	CudaResource, VdpVideoSurface vdpSurface, unsigned int flags );	* low-level CUDA driver application programming interface.
	CUresult CUDAAPI cuGraphicsVDPAURegisterOutputSurface( CUgraphicsResource *	*
	pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags );	* @{
		*/

		/**
		* \brief Gets the CUDA device associated with a VDPAU device
		*
		* Returns in \p *pDevice the CUDA device associated with a \p vdpDevice, i
		f
		* applicable.
		*
		* \param pDevice - Device associated with vdpDevice
		* \param vdpDevice - A VdpDevice handle
		* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE
		* \notefnerr
		*
		* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate, ::cuGraphicsVDPAURegisterVideoSur
		face,
		* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
		* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
		* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray
		*/
		CUresult CUDAAPI cuVDPAUGetDevice(CUdevice *pDevice, VdpDevice vdpDevice, V
		dpGetProcAddress *vdpGetProcAddress);

		#if __CUDA_API_VERSION >= 3020
		/**
		* \brief Create a CUDA context for interoperability with VDPAU
		*
		* Creates a new CUDA context, initializes VDPAU interoperability, and
		* associates the CUDA context with the calling thread. It must be called
		* before performing any other VDPAU interoperability operations. It may fa
		il
		* if the needed VDPAU driver facilities are not available. For usage of th
		e
		* \p flags parameter, see ::cuCtxCreate().
		*
		* \param pCtx - Returned CUDA context
		* \param flags - Options for CUDA context creation
		* \param device - Device on which to create the context
		* \param vdpDevice - The VdpDevice to interop with
		* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_DEINITIALIZED,
		* ::CUDA_ERROR_NOT_INITIALIZED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* ::CUDA_ERROR_INVALID_VALUE,
		* ::CUDA_ERROR_OUT_OF_MEMORY
		* \notefnerr
		*
		* \sa ::cuCtxCreate, ::cuGraphicsVDPAURegisterVideoSurface,
		* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
		* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
		* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
		* ::cuVDPAUGetDevice
		*/
		CUresult CUDAAPI cuVDPAUCtxCreate(CUcontext *pCtx, unsigned int flags, CUde
		vice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
		#endif /* __CUDA_API_VERSION >= 3020 */

		/**
		* \brief Registers a VDPAU VdpVideoSurface object
		*
		* Registers the VdpVideoSurface specified by \p vdpSurface for access by
		* CUDA. A handle to the registered object is returned as \p pCudaResource.
		* The surface's intended usage is specified using \p flags, as follows:
		*
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
		is
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
		* will not write to this resource.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* The VdpVideoSurface is presented as an array of subresources that may be
		* accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArra
		y.
		* The exact number of valid \p arrayIndex values depends on the VDPAU surf
		ace
		* format. The mapping is shown in the table below. \p mipLevel must be 0.
		*
		* \htmlonly
		* <table>
		* <tr><th>VdpChromaType </th><th>arrayIndex<
		/th><th>Size </th><th>Format</th><th>Content </th></tr>
		* <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_420</td><td>0 <
		/td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr>
		* <tr> <td>1 <
		/td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr>
		* <tr> <td>2 <
		/td><td>w/2 x h/4</td><td>R8G8 </td><td>Top-field chroma </td></tr>
		* <tr> <td>3 <
		/td><td>w/2 x h/4</td><td>R8G8 </td><td>Bottom-field chroma</td></tr>
		* <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_422</td><td>0 <
		/td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr>
		* <tr> <td>1 <
		/td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr>
		* <tr> <td>2 <
		/td><td>w/2 x h/2</td><td>R8G8 </td><td>Top-field chroma </td></tr>
		* <tr> <td>3 <
		/td><td>w/2 x h/2</td><td>R8G8 </td><td>Bottom-field chroma</td></tr>
		* </table>
		* \endhtmlonly
		*
		* \latexonly
		* \begin{tabular}{\|l\|l\|l\|l\|l\|}
		* \hline
		* VdpChromaType & arrayIndex & Size & Format & Content
		\\
		* \hline
		* VDP\_CHROMA\_TYPE\_420 & 0 & w x h/2 & R8 & Top-field lum
		a \\
		* & 1 & w x h/2 & R8 & Bottom-field
		luma \\
		* & 2 & w/2 x h/4 & R8G8 & Top-field chr
		oma \\
		* & 3 & w/2 x h/4 & R8G8 & Bottom-field
		chroma \\
		* \hline
		* VDP\_CHROMA\_TYPE\_422 & 0 & w x h/2 & R8 & Top-field lum
		a \\
		* & 1 & w x h/2 & R8 & Bottom-field
		luma \\
		* & 2 & w/2 x h/2 & R8G8 & Top-field chr
		oma \\
		* & 3 & w/2 x h/2 & R8G8 & Bottom-field
		chroma \\
		* \hline
		* \end{tabular}
		* \endlatexonly
		*
		* \param pCudaResource - Pointer to the returned object handle
		* \param vdpSurface - The VdpVideoSurface to be registered
		* \param flags - Map flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* \notefnerr
		*
		* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate,
		* ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource,
		* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
		* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
		* ::cuVDPAUGetDevice
		*/
		CUresult CUDAAPI cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource *pC
		udaResource, VdpVideoSurface vdpSurface, unsigned int flags);

		/**
		* \brief Registers a VDPAU VdpOutputSurface object
		*
		* Registers the VdpOutputSurface specified by \p vdpSurface for access by
		* CUDA. A handle to the registered object is returned as \p pCudaResource.
		* The surface's intended usage is specified using \p flags, as follows:
		*
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how th
		is
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA
		* will not write to this resource.
		* - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* The VdpOutputSurface is presented as an array of subresources that may b
		e
		* accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArra
		y.
		* The exact number of valid \p arrayIndex values depends on the VDPAU surf
		ace
		* format. The mapping is shown in the table below. \p mipLevel must be 0.
		*
		* \htmlonly
		* <table>
		* <tr><th>VdpRGBAFormat </th><th>arrayIndex</th><th>Size </th
		><th>Format </th><th>Content </th></tr>
		* <tr><td>VDP_RGBA_FORMAT_B8G8R8A8 </td><td>0 </td><td>w x h</td
		><td>ARGB8 </td><td>Entire surface</td></tr>
		* <tr><td>VDP_RGBA_FORMAT_R10G10B10A2</td><td>0 </td><td>w x h</td
		><td>A2BGR10</td><td>Entire surface</td></tr>
		* </table>
		* \endhtmlonly
		*
		* \latexonly
		* \begin{tabular}{\|l\|l\|l\|l\|l\|}
		* \hline
		* VdpRGBAFormat & arrayIndex & Size & Format & Content
		\\
		* \hline
		* VDP\_RGBA\_FORMAT\_B8G8R8A8 & 0 & w x h & ARGB8 & Entire s
		urface \\
		* VDP\_RGBA\_FORMAT\_R10G10B10A2 & 0 & w x h & A2BGR10 & Entire s
		urface \\
		* \hline
		* \end{tabular}
		* \endlatexonly
		*
		* \param pCudaResource - Pointer to the returned object handle
		* \param vdpSurface - The VdpOutputSurface to be registered
		* \param flags - Map flags
		*
		* \return
		* ::CUDA_SUCCESS,
		* ::CUDA_ERROR_INVALID_HANDLE,
		* ::CUDA_ERROR_ALREADY_MAPPED,
		* ::CUDA_ERROR_INVALID_CONTEXT,
		* \notefnerr
		*
		* \sa ::cuCtxCreate, ::cuVDPAUCtxCreate,
		* ::cuGraphicsVDPAURegisterVideoSurface, ::cuGraphicsUnregisterResource,
		* ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources,
		* ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray,
		* ::cuVDPAUGetDevice
		*/
		CUresult CUDAAPI cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource *p
		CudaResource, VdpOutputSurface vdpSurface, unsigned int flags);

		/** @} / / END CUDA_VDPAU */

		/**
		* CUDA API versioning support
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL)
		#undef cuVDPAUCtxCreate
		#endif /* __CUDA_API_VERSION_INTERNAL */

		/**
		* CUDA API made obselete at API version 3020
		*/
		#if defined(__CUDA_API_VERSION_INTERNAL) \|\| __CUDA_API_VERSION < 3020
		CUresult CUDAAPI cuVDPAUCtxCreate(CUcontext *pCtx, unsigned int flags, CUde
		vice device, VdpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);
		#endif /* __CUDA_API_VERSION_INTERNAL \|\| __CUDA_API_VERSION < 3020 */

	#ifdef __cplusplus	#ifdef __cplusplus
	};	};
	#endif	#endif


		#undef __CUDA_API_VERSION

	#endif	#endif

End of changes. 3 change blocks.
	8 lines changed or deleted	278 lines changed or added

	cuda_gl_interop.h	cuda_gl_interop.h

	skipping to change at line 39	skipping to change at line 39
	* source code with only those rights set forth herein.	* source code with only those rights set forth herein.
	*	*
	* Any use of this source code in individual and commercial software must	* Any use of this source code in individual and commercial software must
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#if !defined(__CUDA_GL_INTEROP_H__)	#if !defined(__CUDA_GL_INTEROP_H__)
	#define __CUDA_GL_INTEROP_H__	#define __CUDA_GL_INTEROP_H__


	/**************************************************************************
	*****
	*
	*
	*
	*
	*
	*
	***************************************************************************
	****/

	#include "builtin_types.h"	#include "builtin_types.h"
	#include "host_defines.h"	#include "host_defines.h"

	#if defined(__APPLE__)	#if defined(__APPLE__)

	#include <OpenGL/gl.h>	#include <OpenGL/gl.h>

	#else /* __APPLE__ */	#else /* __APPLE__ */

	#include <GL/gl.h>	#include <GL/gl.h>

	#endif /* __APPLE__ */	#endif /* __APPLE__ */

	#if defined(__cplusplus)	#if defined(__cplusplus)
	extern "C" {	extern "C" {
	#endif /* __cplusplus */	#endif /* __cplusplus */


	/**************************************************************************	/**
	*****	* \addtogroup CUDART_OPENGL OpenGL Interoperability
	*	* This section describes the OpenGL interoperability functions of the CUDA
	*	* runtime application programming interface.
	*	*
	*	* @{
	*	*/
	*
	***************************************************************************
	****/


		/**
		* \brief Sets the CUDA device for use with OpenGL interoperability
		*
		* Records \p device as the device on which the active host thread executes
		* the device code. Records the thread as using OpenGL interoperability.
		* If the host thread has already initialized the CUDA runtime by
		* calling non-device management runtime functions or if there exists a CUD
		A
		* driver context active on the host thread, then this call returns
		* ::cudaErrorSetOnActiveProcess.
		*
		* \param device - Device to use for OpenGL interoperability
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorSetOnActiveProcess
		* \notefnerr
		*
		* \sa ::cudaGLRegisterBufferObject, ::cudaGLMapBufferObject,
		* ::cudaGLUnmapBufferObject, ::cudaGLUnregisterBufferObject,
		* ::cudaGLMapBufferObjectAsync, ::cudaGLUnmapBufferObjectAsync
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLSetGLDevice(int device);	extern __host__ cudaError_t CUDARTAPI cudaGLSetGLDevice(int device);

	extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterImage(struct cu
	daGraphicsResource **resource, GLuint image, GLenum target, unsigned int Fl	/**
	ags);	* \brief Register an OpenGL texture or renderbuffer object
	extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterBuffer(struct c	*
	udaGraphicsResource **resource, GLuint buffer, unsigned int Flags);	* Registers the texture or renderbuffer object specified by \p image for a
		ccess by CUDA.
		* \p target must match the type of the object.
		* A handle to the registered object is returned as \p resource.
		* The map flags \p flags specify the intended usage, as follows:
		*
		* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
		* will not write to this resource.
		* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* The following image classes are currently disallowed:
		* - Textures with borders
		* - Multisampled renderbuffers
		*
		* \param resource - Pointer to the returned object handle
		* \param image - name of texture or renderbuffer object to be registere
		d
		* \param target - Identifies the type of object specified by \p image, a
		nd must be one of
		* ::GL_TEXTURE_2D,
		* ::GL_TEXTURE_RECTANGLE,
		* ::GL_TEXTURE_CUBE_MAP,
		* ::GL_TEXTURE_3D,
		* ::GL_TEXTURE_2D_ARRAY, or
		* ::GL_RENDERBUFFER.
		* \param flags - Map flags
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidResourceHandle,
		* ::cudaErrorUnknown
		* \notefnerr
		*
		* \sa
		* ::cudaGLSetGLDevice
		* ::cudaGraphicsUnregisterResource,
		* ::cudaGraphicsMapResources,
		* ::cudaGraphicsSubResourceGetMappedArray
		*/
		extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterImage(struct cu
		daGraphicsResource **resource, GLuint image, GLenum target, unsigned int fl
		ags);

		/**
		* \brief Registers an OpenGL buffer object
		*
		* Registers the buffer object specified by \p buffer for access by
		* CUDA. A handle to the registered object is returned as \p
		* resource. The map flags \p flags specify the intended usage,
		* as follows:
		*
		* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
		* will not write to this resource.
		* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* \param resource - Pointer to the returned object handle
		* \param buffer - name of buffer object to be registered
		* \param flags - Map flags
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidResourceHandle,
		* ::cudaErrorUnknown
		* \notefnerr
		*
		* \sa
		* ::cudaGLCtxCreate,
		* ::cudaGraphicsUnregisterResource,
		* ::cudaGraphicsMapResources,
		* ::cudaGraphicsResourceGetMappedPointer
		*/
		extern __host__ cudaError_t CUDARTAPI cudaGraphicsGLRegisterBuffer(struct c
		udaGraphicsResource **resource, GLuint buffer, unsigned int flags);

	#ifdef _WIN32	#ifdef _WIN32
	#ifndef WGL_NV_gpu_affinity	#ifndef WGL_NV_gpu_affinity
	typedef void* HGPUNV;	typedef void* HGPUNV;
	#endif	#endif


		/**
		* \brief Gets the CUDA device associated with hGpu
		*
		* Returns the CUDA device associated with a hGpu, if applicable.
		*
		* \param device - Returns the device associated with hGpu, or -1 if hGpu i
		s
		* not a compute device.
		* \param hGpu - Handle to a GPU, as queried via WGL_NV_gpu_affinity()
		*
		* \return
		* ::cudaSuccess
		* \notefnerr
		*
		* \sa WGL_NV_gpu_affinity, ::cudaGLSetGLDevice
		*/
	extern __host__ cudaError_t CUDARTAPI cudaWGLGetDevice(int *device, HGPUNV hGpu);	extern __host__ cudaError_t CUDARTAPI cudaWGLGetDevice(int *device, HGPUNV hGpu);
	#endif	#endif

	/**	/**
	* CUDA GL Map Flags	* CUDA GL Map Flags
	*/	*/
	enum cudaGLMapFlags	enum cudaGLMapFlags
	{	{

	cudaGLMapFlagsNone = 0, ///< Default; Assume resource can be rea	cudaGLMapFlagsNone = 0, /**< Default; Assume resource can be rea
	d/written	d/written */
	cudaGLMapFlagsReadOnly = 1, ///< CUDA kernels will not write to this	cudaGLMapFlagsReadOnly = 1, /**< CUDA kernels will not write to this
	resource	resource */
	cudaGLMapFlagsWriteDiscard = 2, ///< CUDA kernels will only write to and	cudaGLMapFlagsWriteDiscard = 2, /**< CUDA kernels will only write to and
	will not read from this resource	will not read from this resource */
	};	};


		/**
		* \defgroup CUDART_OPENGL_DEPRECATED OpenGL Interoperability [DEPRECATED]
		* This section describes deprecated OpenGL interoperability functionality.
		*
		* @{
		*/

		/**
		* \brief Registers a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Registers the buffer object of ID \p bufObj for access by
		* CUDA. This function must be called before CUDA can map the buffer
		* object. The OpenGL context used to create the buffer, or another
		* context from the same share group, must be bound to the current
		* thread when this is called.
		*
		* \param bufObj - Buffer object ID to register
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInitializationError
		* \notefnerr
		*
		* \sa ::cudaGraphicsGLRegisterBuffer
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLRegisterBufferObject(GLuint buf Obj);	extern __host__ cudaError_t CUDARTAPI cudaGLRegisterBufferObject(GLuint buf Obj);


		/**
		* \brief Maps a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Maps the buffer object of ID \p bufObj into the address space of
		* CUDA and returns in \p *devPtr the base pointer of the resulting
		* mapping. The buffer must have previously been registered by
		* calling ::cudaGLRegisterBufferObject(). While a buffer is mapped
		* by CUDA, any OpenGL operation which references the buffer will
		* result in undefined behavior. The OpenGL context used to create
		* the buffer, or another context from the same share group, must be
		* bound to the current thread when this is called.
		*
		* All streams in the current thread are synchronized with the current
		* GL context.
		*
		* \param devPtr - Returned device pointer to CUDA object
		* \param bufObj - Buffer object ID to map
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorMapBufferObjectFailed
		* \notefnerr
		*
		* \sa ::cudaGraphicsMapResources
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObject(void **devPtr, GLuint bufObj);	extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObject(void **devPtr, GLuint bufObj);


		/**
		* \brief Unmaps a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unmaps the buffer object of ID \p bufObj for access by CUDA. When
		* a buffer is unmapped, the base address returned by
		* ::cudaGLMapBufferObject() is invalid and subsequent references to
		* the address result in undefined behavior. The OpenGL context used
		* to create the buffer, or another context from the same share group,
		* must be bound to the current thread when this is called.
		*
		* All streams in the current thread are synchronized with the current
		* GL context.
		*
		* \param bufObj - Buffer object to unmap
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevicePointer,
		* ::cudaErrorUnmapBufferObjectFailed
		* \notefnerr
		*
		* \sa ::cudaGraphicsUnmapResources
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObject(GLuint bufObj );	extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObject(GLuint bufObj );


		/**
		* \brief Unregisters a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unregisters the buffer object of ID \p bufObj for access by CUDA
		* and releases any CUDA resources associated with the buffer. Once a
		* buffer is unregistered, it may no longer be mapped by CUDA. The GL
		* context used to create the buffer, or another context from the
		* same share group, must be bound to the current thread when this is
		* called.
		*
		* \param bufObj - Buffer object to unregister
		*
		* \return
		* ::cudaSuccess
		* \notefnerr
		*
		* \sa ::cudaGraphicsUnregisterResource
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLUnregisterBufferObject(GLuint b ufObj);	extern __host__ cudaError_t CUDARTAPI cudaGLUnregisterBufferObject(GLuint b ufObj);


		/**
		* \brief Set usage flags for mapping an OpenGL buffer
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Set flags for mapping the OpenGL buffer \p bufObj
		*
		* Changes to flags will take effect the next time \p bufObj is mapped.
		* The \p flags argument may be any of the following:
		*
		* - ::cudaGLMapFlagsNone: Specifies no hints about how this buffer will
		* be used. It is therefore assumed that this buffer will be read from and
		* written to by CUDA kernels. This is the default value.
		* - ::cudaGLMapFlagsReadOnly: Specifies that CUDA kernels which access thi
		s
		* buffer will not write to the buffer.
		* - ::cudaGLMapFlagsWriteDiscard: Specifies that CUDA kernels which access
		* this buffer will not read from the buffer and will write over the
		* entire contents of the buffer, so none of the data previously stored in
		* the buffer will be preserved.
		*
		* If \p bufObj has not been registered for use with CUDA, then
		* ::cudaErrorInvalidResourceHandle is returned. If \p bufObj is presently
		* mapped for access by CUDA, then ::cudaErrorUnknown is returned.
		*
		* \param bufObj - Registered buffer object to set flags for
		* \param flags - Parameters for buffer mapping
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidResourceHandle,
		* ::cudaErrorUnknown
		* \notefnerr
		*
		* \sa ::cudaGraphicsResourceSetMapFlags
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLSetBufferObjectMapFlags(GLuint bufObj, unsigned int flags);	extern __host__ cudaError_t CUDARTAPI cudaGLSetBufferObjectMapFlags(GLuint bufObj, unsigned int flags);


		/**
		* \brief Maps a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Maps the buffer object of ID \p bufObj into the address space of
		* CUDA and returns in \p *devPtr the base pointer of the resulting
		* mapping. The buffer must have previously been registered by
		* calling ::cudaGLRegisterBufferObject(). While a buffer is mapped
		* by CUDA, any OpenGL operation which references the buffer will
		* result in undefined behavior. The OpenGL context used to create
		* the buffer, or another context from the same share group, must be
		* bound to the current thread when this is called.
		*
		* Stream /p stream is synchronized with the current GL context.
		*
		* \param devPtr - Returned device pointer to CUDA object
		* \param bufObj - Buffer object ID to map
		* \param stream - Stream to synchronize
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorMapBufferObjectFailed
		* \notefnerr
		*
		* \sa ::cudaGraphicsMapResources
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObjectAsync(void **dev Ptr, GLuint bufObj, cudaStream_t stream);	extern __host__ cudaError_t CUDARTAPI cudaGLMapBufferObjectAsync(void **dev Ptr, GLuint bufObj, cudaStream_t stream);


		/**
		* \brief Unmaps a buffer object for access by CUDA
		*
		* \deprecated This function is deprecated as of Cuda 3.0.
		*
		* Unmaps the buffer object of ID \p bufObj for access by CUDA. When
		* a buffer is unmapped, the base address returned by
		* ::cudaGLMapBufferObject() is invalid and subsequent references to
		* the address result in undefined behavior. The OpenGL context used
		* to create the buffer, or another context from the same share group,
		* must be bound to the current thread when this is called.
		*
		* Stream /p stream is synchronized with the current GL context.
		*
		* \param bufObj - Buffer object to unmap
		* \param stream - Stream to synchronize
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevicePointer,
		* ::cudaErrorUnmapBufferObjectFailed
		* \notefnerr
		*
		* \sa ::cudaGraphicsUnmapResources
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObjectAsync(GLuint b ufObj, cudaStream_t stream);	extern __host__ cudaError_t CUDARTAPI cudaGLUnmapBufferObjectAsync(GLuint b ufObj, cudaStream_t stream);


		/** @} / / END CUDART_OPENGL_DEPRECATED */

		/** @} / / END CUDART_OPENGL */

	#if defined(__cplusplus)	#if defined(__cplusplus)
	}	}
	#endif /* __cplusplus */	#endif /* __cplusplus */

	#endif /* __CUDA_GL_INTEROP_H__ */	#endif /* __CUDA_GL_INTEROP_H__ */

End of changes. 14 change blocks.
	32 lines changed or deleted	345 lines changed or added

	cuda_runtime.h	cuda_runtime.h

	skipping to change at line 79	skipping to change at line 79

	#if defined(__cplusplus)	#if defined(__cplusplus)

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	/**	/**

	* \ingroup CUDART_HIGHLEVEL	* \addtogroup CUDART_HIGHLEVEL
		* @{
		*/

		/**
	* \brief \hl Configure a device launch	* \brief \hl Configure a device launch
	*	*
	* Pushes \p size bytes of the argument pointed to by \p arg at \p offset	* Pushes \p size bytes of the argument pointed to by \p arg at \p offset
	* bytes from the start of the parameter passing area, which starts at	* bytes from the start of the parameter passing area, which starts at
	* offset 0. The arguments are stored in the top of the execution stack.	* offset 0. The arguments are stored in the top of the execution stack.

	* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument()" must be precede d	* \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument()" must be preced ed
	* by a call to ::cudaConfigureCall().	* by a call to ::cudaConfigureCall().
	*	*
	* \param arg - Argument to push for a kernel launch	* \param arg - Argument to push for a kernel launch
	* \param offset - Offset in argument stack to push new arg	* \param offset - Offset in argument stack to push new arg
	*	*
	* \return	* \return
	* ::cudaSuccess	* ::cudaSuccess
	* \notefnerr	* \notefnerr
	*	*
	* \sa ::cudaConfigureCall,	* \sa ::cudaConfigureCall,

	skipping to change at line 111	skipping to change at line 115
	*/	*/
	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaSetupArgument(	__inline__ __host__ cudaError_t cudaSetupArgument(
	T arg,	T arg,
	size_t offset	size_t offset
	)	)
	{	{
	return cudaSetupArgument((const void*)&arg, sizeof(T), offset);	return cudaSetupArgument((const void*)&arg, sizeof(T), offset);
	}	}


		/**
		* \brief \hl Creates an event object with the specified flags
		*
		* Creates an event object with the specified flags. Valid flags include:
		* - ::cudaEventDefault: Default event creation flag.
		* - ::cudaEventBlockingSync: Specifies that event should use blocking
		* synchronization. A host thread that uses ::cudaEventSynchronize() to w
		ait
		* on an event created with this flag will block until the event actually
		* completes.
		* - ::cudaEventDisableTiming: Specifies that the created event does not ne
		ed
		* to record timing data. Events created with this flag specified and
		* the ::cudaEventBlockingSync flag not specified will provide the best
		* performance when used with ::cudaStreamWaitEvent() and ::cudaEventQuer
		y().
		*
		* \param event - Newly created event
		* \param flags - Flags for new event
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInitializationError,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorLaunchFailure,
		* ::cudaErrorMemoryAllocation
		* \notefnerr
		*
		* \sa \ref ::cudaEventCreate(cudaEvent_t*) "cudaEventCreate (C API)",
		* ::cudaEventCreateWithFlags, ::cudaEventRecord, ::cudaEventQuery,
		* ::cudaEventSynchronize, ::cudaEventDestroy, ::cudaEventElapsedTime,
		* ::cudaStreamWaitEvent
		*/
		static __inline__ __host__ cudaError_t cudaEventCreate(
		cudaEvent_t *event,
		unsigned int flags
		)
		{
		return cudaEventCreateWithFlags(event, cudaEventDefault);
		}

		/**
		* \brief \hl Allocates page-locked memory on the host
		*
		* Allocates \p size bytes of host memory that is page-locked and accessibl
		e
		* to the device. The driver tracks the virtual memory ranges allocated wit
		h
		* this function and automatically accelerates calls to functions such as
		* ::cudaMemcpy(). Since the memory can be accessed directly by the device,
		it
		* can be read or written with much higher bandwidth than pageable memory
		* obtained with functions such as ::malloc(). Allocating excessive amounts
		of
		* pinned memory may degrade system performance, since it reduces the amoun
		t
		* of memory available to the system for paging. As a result, this function
		is
		* best used sparingly to allocate staging areas for data exchange between
		host
		* and device.
		*
		* The \p flags parameter enables different options to be specified that af
		fect
		* the allocation, as follows.
		* - ::cudaHostAllocDefault: This flag's value is defined to be 0.
		* - ::cudaHostAllocPortable: The memory returned by this call will be
		* considered as pinned memory by all CUDA contexts, not just the one that
		* performed the allocation.
		* - ::cudaHostAllocMapped: Maps the allocation into the CUDA address space
		.
		* The device pointer to the memory may be obtained by calling
		* ::cudaHostGetDevicePointer().
		* - ::cudaHostAllocWriteCombined: Allocates the memory as write-combined (
		WC).
		* WC memory can be transferred across the PCI Express bus more quickly on
		some
		* system configurations, but cannot be read efficiently by most CPUs. WC
		* memory is a good option for buffers that will be written by the CPU and
		read
		* by the device via mapped pinned memory or host->device transfers.
		*
		* All of these flags are orthogonal to one another: a developer may alloca
		te
		* memory that is portable, mapped and/or write-combined with no restrictio
		ns.
		*
		* ::cudaSetDeviceFlags() must have been called with the ::cudaDeviceMapHos
		t
		* flag in order for the ::cudaHostAllocMapped flag to have any effect.
		*
		* The ::cudaHostAllocMapped flag may be specified on CUDA contexts for dev
		ices
		* that do not support mapped pinned memory. The failure is deferred to
		* ::cudaHostGetDevicePointer() because the memory may be mapped into other
		* CUDA contexts via the ::cudaHostAllocPortable flag.
		*
		* Memory allocated by this function must be freed with ::cudaFreeHost().
		*
		* \param ptr - Device pointer to allocated memory
		* \param size - Requested allocation size in bytes
		* \param flags - Requested properties of allocated memory
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorMemoryAllocation
		* \notefnerr
		*
		* \sa ::cudaSetDeviceFlags,
		* \ref ::cudaMallocHost(void**, size_t) "cudaMallocHost (C API)",
		* ::cudaFreeHost, ::cudaHostAlloc
		*/
		static __inline__ __host__ cudaError_t cudaMallocHost(
		void **ptr,
		size_t size,
		unsigned int flags
		)
		{
		return cudaHostAlloc(ptr, size, flags);
		}

	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaHostAlloc(	__inline__ __host__ cudaError_t cudaHostAlloc(
	T **ptr,	T **ptr,
	size_t size,	size_t size,
	unsigned int flags	unsigned int flags
	)	)
	{	{
	return cudaHostAlloc((void*)(void)ptr, size, flags);	return cudaHostAlloc((void*)(void)ptr, size, flags);
	}	}


	skipping to change at line 142	skipping to change at line 248
	__inline__ __host__ cudaError_t cudaMalloc(	__inline__ __host__ cudaError_t cudaMalloc(
	T **devPtr,	T **devPtr,
	size_t size	size_t size
	)	)
	{	{
	return cudaMalloc((void*)(void)devPtr, size);	return cudaMalloc((void*)(void)devPtr, size);
	}	}

	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaMallocHost(	__inline__ __host__ cudaError_t cudaMallocHost(

	T **ptr,	T **ptr,
	size_t size	size_t size,
		unsigned int flags = 0
	)	)
	{	{

	return cudaMallocHost((void*)(void)ptr, size);	return cudaMallocHost((void*)(void)ptr, size, flags);
	}	}

	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaMallocPitch(	__inline__ __host__ cudaError_t cudaMallocPitch(
	T **devPtr,	T **devPtr,
	size_t *pitch,	size_t *pitch,
	size_t width,	size_t width,
	size_t height	size_t height
	)	)
	{	{

	skipping to change at line 168	skipping to change at line 275
	}	}

	#if defined(__CUDACC__)	#if defined(__CUDACC__)

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/


	/**
	* \addtogroup CUDART_HIGHLEVEL
	* @{
	*/

	static __inline__ __host__ cudaError_t cudaMemcpyToSymbol(	static __inline__ __host__ cudaError_t cudaMemcpyToSymbol(
	char *symbol,	char *symbol,
	const void *src,	const void *src,
	size_t count,	size_t count,
	size_t offset = 0,	size_t offset = 0,
	enum cudaMemcpyKind kind = cudaMemcpyHostToDevice	enum cudaMemcpyKind kind = cudaMemcpyHostToDevice
	)	)
	{	{
	return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind);	return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind);
	}	}

	skipping to change at line 365	skipping to change at line 467
	*/	*/
	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaGetSymbolSize(	__inline__ __host__ cudaError_t cudaGetSymbolSize(
	size_t *size,	size_t *size,
	const T &symbol	const T &symbol
	)	)
	{	{
	return cudaGetSymbolSize(size, (const char*)&symbol);	return cudaGetSymbolSize(size, (const char*)&symbol);
	}	}


	/** @} / / END CUDART_HIGHLEVEL */

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	/**	/**

	* \addtogroup CUDART_HIGHLEVEL
	*
	* @{
	*/

	/**
	* \brief \hl Binds a memory area to a texture	* \brief \hl Binds a memory area to a texture
	*	*
	* Binds \p size bytes of the memory area pointed to by \p devPtr to textur e	* Binds \p size bytes of the memory area pointed to by \p devPtr to textur e
	* reference \p tex. \p desc describes how the memory is interpreted when	* reference \p tex. \p desc describes how the memory is interpreted when
	* fetching values from the texture. The \p offset parameter is an optional	* fetching values from the texture. The \p offset parameter is an optional
	* byte offset as with the low-level	* byte offset as with the low-level
	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture()"	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture()"
	* function. Any memory previously bound to \p tex is unbound.	* function. Any memory previously bound to \p tex is unbound.
	*	*
	* \param offset - Offset in bytes	* \param offset - Offset in bytes

	skipping to change at line 405	skipping to change at line 499
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidDevicePointer,	* ::cudaErrorInvalidDevicePointer,
	* ::cudaErrorInvalidTexture	* ::cudaErrorInvalidTexture
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,
	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture (C API)" ,	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture (C API)" ,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
	ureToArray (C++ API)",	ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	l descriptor)",	reToArray (C++ API)",
	* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	UnbindTexture (C++ API)",	onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d	descriptor)",
	im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
		nbindTexture (C++ API)",
		* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
		m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaBindTexture(	__inline__ __host__ cudaError_t cudaBindTexture(
	size_t *offset,	size_t *offset,
	const struct texture<T, dim, readMode> &tex,	const struct texture<T, dim, readMode> &tex,
	const void *devPtr,	const void *devPtr,
	const struct cudaChannelFormatDesc &desc,	const struct cudaChannelFormatDesc &desc,
	size_t size = UINT_MAX	size_t size = UINT_MAX
	)	)
	{	{

	skipping to change at line 446	skipping to change at line 541
	* \param devPtr - Memory area on device	* \param devPtr - Memory area on device
	* \param size - Size of the memory area pointed to by devPtr	* \param size - Size of the memory area pointed to by devPtr
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidDevicePointer,	* ::cudaErrorInvalidDevicePointer,
	* ::cudaErrorInvalidTexture	* ::cudaErrorInvalidTexture
	* \notefnerr	* \notefnerr
	*	*

	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API),	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,
	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture (C API)" ,	* \ref ::cudaBindTexture(size_t, const struct textureReference, const vo id, const struct cudaChannelFormatDesc, size_t) "cudaBindTexture (C API)" ,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	tor)",	(C++ API)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
	ureToArray (C++ API)",	ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	l descriptor),	reToArray (C++ API)",
		* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
		onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
		descriptor)",
	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",

	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaBindTexture(	__inline__ __host__ cudaError_t cudaBindTexture(
	size_t *offset,	size_t *offset,
	const struct texture<T, dim, readMode> &tex,	const struct texture<T, dim, readMode> &tex,
	const void *devPtr,	const void *devPtr,
	size_t size = UINT_MAX	size_t size = UINT_MAX
	)	)
	{	{
	return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size);	return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size);

	skipping to change at line 478	skipping to change at line 574
	* \brief \hl Binds a 2D memory area to a texture	* \brief \hl Binds a 2D memory area to a texture
	*	*
	* Binds the 2D memory area pointed to by \p devPtr to the	* Binds the 2D memory area pointed to by \p devPtr to the
	* texture reference \p tex. The size of the area is constrained by	* texture reference \p tex. The size of the area is constrained by
	* \p width in texel units, \p height in texel units, and \p pitch in byte	* \p width in texel units, \p height in texel units, and \p pitch in byte
	* units. \p desc describes how the memory is interpreted when fetching val ues	* units. \p desc describes how the memory is interpreted when fetching val ues
	* from the texture. Any memory previously bound to \p tex is unbound.	* from the texture. Any memory previously bound to \p tex is unbound.
	*	*
	* Since the hardware enforces an alignment requirement on texture base	* Since the hardware enforces an alignment requirement on texture base
	* addresses,	* addresses,

	* \ref ::cudaBindTexture2D(size_t, const struct texture< T, dim, readMode >&, const void, const struct cudaChannelFormatDesc&, size_t, size_t, size_ t) "cudaBindTexture2D()"	* \ref ::cudaBindTexture2D(size_t, const struct texture<T, dim, readMode> &, const void, const struct cudaChannelFormatDesc&, size_t, size_t, size_t ) "cudaBindTexture2D()"
	* returns in \p *offset a byte offset that	* returns in \p *offset a byte offset that
	* must be applied to texture fetches in order to read from the desired mem ory.	* must be applied to texture fetches in order to read from the desired mem ory.
	* This offset must be divided by the texel size and passed to kernels that	* This offset must be divided by the texel size and passed to kernels that
	* read from the texture so they can be applied to the ::tex2D() function.	* read from the texture so they can be applied to the ::tex2D() function.
	* If the device memory pointer was returned from ::cudaMalloc(), the offse t is	* If the device memory pointer was returned from ::cudaMalloc(), the offse t is
	* guaranteed to be 0 and NULL may be passed as the \p offset parameter.	* guaranteed to be 0 and NULL may be passed as the \p offset parameter.
	*	*
	* \param offset - Offset in bytes	* \param offset - Offset in bytes
	* \param tex - Texture reference to bind	* \param tex - Texture reference to bind
	* \param devPtr - 2D memory area on device	* \param devPtr - 2D memory area on device

	skipping to change at line 501	skipping to change at line 597
	* \param height - Height in texel units	* \param height - Height in texel units
	* \param pitch - Pitch in bytes	* \param pitch - Pitch in bytes
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidDevicePointer,	* ::cudaErrorInvalidDevicePointer,
	* ::cudaErrorInvalidTexture	* ::cudaErrorInvalidTexture
	* \notefnerr	* \notefnerr
	*	*

	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API),	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	e (C++ API),	(C++ API)",
	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t, const struct textureReference, const void, const struct cudaChannelFormatDesc, size_t, size_t, size_t) "cudaBi ndTexture2D (C API)",	* \ref ::cudaBindTexture2D(size_t, const struct textureReference, const void, const struct cudaChannelFormatDesc, size_t, size_t, size_t) "cudaBi ndTexture2D (C API)",

	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
	ureToArray (C++ API)",	ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	l descriptor),	reToArray (C++ API)",
		* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
		onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
		descriptor)",
	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU nbindTexture (C++ API)",

	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaBindTexture2D(	__inline__ __host__ cudaError_t cudaBindTexture2D(
	size_t *offset,	size_t *offset,
	const struct texture<T, dim, readMode> &tex,	const struct texture<T, dim, readMode> &tex,
	const void *devPtr,	const void *devPtr,
	const struct cudaChannelFormatDesc &desc,	const struct cudaChannelFormatDesc &desc,
	size_t width,	size_t width,
	size_t height,	size_t height,
	size_t pitch	size_t pitch
	)	)
	{	{

	return cudaBindTexture2D( offset, &tex, devPtr, &desc, width, height, pit	return cudaBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitc
	ch);	h);
		}

		/**
		* \brief \hl Binds a 2D memory area to a texture
		*
		* Binds the 2D memory area pointed to by \p devPtr to the
		* texture reference \p tex. The size of the area is constrained by
		* \p width in texel units, \p height in texel units, and \p pitch in byte
		* units. The channel descriptor is inherited from the texture reference
		* type. Any memory previously bound to \p tex is unbound.
		*
		* Since the hardware enforces an alignment requirement on texture base
		* addresses,
		* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
		&, const void*, size_t, size_t, size_t) "cudaBindTexture2D()"
		* returns in \p *offset a byte offset that
		* must be applied to texture fetches in order to read from the desired mem
		ory.
		* This offset must be divided by the texel size and passed to kernels that
		* read from the texture so they can be applied to the ::tex2D() function.
		* If the device memory pointer was returned from ::cudaMalloc(), the offse
		t is
		* guaranteed to be 0 and NULL may be passed as the \p offset parameter.
		*
		* \param offset - Offset in bytes
		* \param tex - Texture reference to bind
		* \param devPtr - 2D memory area on device
		* \param width - Width in texel units
		* \param height - Height in texel units
		* \param pitch - Pitch in bytes
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidDevicePointer,
		* ::cudaErrorInvalidTexture
		* \notefnerr
		*
		* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)"
		,
		* ::cudaGetChannelDesc, ::cudaGetTextureReference,
		* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
		const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
		(C++ API)",
		* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
		const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
		or)",
		* \ref ::cudaBindTexture2D(size_t, const struct textureReference, const
		void, const struct cudaChannelFormatDesc, size_t, size_t, size_t) "cudaBi
		ndTexture2D (C API)",
		* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
		&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
		) "cudaBindTexture2D (C++ API)",
		* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
		onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
		reToArray (C++ API)",
		* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
		onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
		descriptor)",
		* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
		nbindTexture (C++ API)",
		* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
		m, readMode>&) "cudaGetTextureAlignmentOffset (C++ API)"
		*/
		template<class T, int dim, enum cudaTextureReadMode readMode>
		__inline__ __host__ cudaError_t cudaBindTexture2D(
		size_t *offset,
		const struct texture<T, dim, readMode> &tex,
		const void *devPtr,
		size_t width,
		size_t height,
		size_t pitch
		)
		{
		return cudaBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, h
		eight, pitch);
	}	}

	/**	/**
	* \brief \hl Binds an array to a texture	* \brief \hl Binds an array to a texture
	*	*
	* Binds the CUDA array \p array to the texture reference \p tex.	* Binds the CUDA array \p array to the texture reference \p tex.
	* \p desc describes how the memory is interpreted when fetching values fro m	* \p desc describes how the memory is interpreted when fetching values fro m
	* the texture. Any CUDA array previously bound to \p tex is unbound.	* the texture. Any CUDA array previously bound to \p tex is unbound.
	*	*
	* \param tex - Texture to bind	* \param tex - Texture to bind

	skipping to change at line 545	skipping to change at line 699
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidDevicePointer,	* ::cudaErrorInvalidDevicePointer,
	* ::cudaErrorInvalidTexture	* ::cudaErrorInvalidTexture
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	e (C++ API)",	(C++ API)",
	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
		* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
		&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
		ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct textureReference, const stru ct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",	* \ref ::cudaBindTextureToArray(const struct textureReference, const stru ct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",

	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
	l descriptor)",	descriptor)",
	* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
	UnbindTexture (C++ API)",	nbindTexture (C++ API)",
	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
	im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"	m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaBindTextureToArray(	__inline__ __host__ cudaError_t cudaBindTextureToArray(
	const struct texture<T, dim, readMode> &tex,	const struct texture<T, dim, readMode> &tex,
	const struct cudaArray *array,	const struct cudaArray *array,
	const struct cudaChannelFormatDesc &desc	const struct cudaChannelFormatDesc &desc
	)	)
	{	{
	return cudaBindTextureToArray(&tex, array, &desc);	return cudaBindTextureToArray(&tex, array, &desc);
	}	}

	skipping to change at line 582	skipping to change at line 737
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidDevicePointer,	* ::cudaErrorInvalidDevicePointer,
	* ::cudaErrorInvalidTexture	* ::cudaErrorInvalidTexture
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	e (C++ API)",	(C++ API)",
	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
		* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
		&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
		ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct textureReference, const stru ct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",	* \ref ::cudaBindTextureToArray(const struct textureReference, const stru ct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindTextureToArray (C API)",

	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	ureToArray (C++ API)",	reToArray (C++ API)",
	* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda	* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
	UnbindTexture (C++ API)",	nbindTexture (C++ API)",
	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di
	im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"	m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaBindTextureToArray(	__inline__ __host__ cudaError_t cudaBindTextureToArray(
	const struct texture<T, dim, readMode> &tex,	const struct texture<T, dim, readMode> &tex,
	const struct cudaArray *array	const struct cudaArray *array
	)	)
	{	{
	struct cudaChannelFormatDesc desc;	struct cudaChannelFormatDesc desc;
	cudaError_t err = cudaGetChannelDesc(&desc, array);	cudaError_t err = cudaGetChannelDesc(&desc, array);


	skipping to change at line 620	skipping to change at line 776
	*	*
	* Unbinds the texture bound to \p tex.	* Unbinds the texture bound to \p tex.
	*	*
	* \param tex - Texture to unbind	* \param tex - Texture to unbind
	*	*
	* \return ::cudaSuccess	* \return ::cudaSuccess
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	e (C++ API)",	(C++ API)",
	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
	ureToArray (C++ API)",	ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	l descriptor)",	reToArray (C++ API)",
		* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
		onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
		descriptor)",
	* \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText ure (C API)",	* \ref ::cudaUnbindTexture(const struct textureReference*) "cudaUnbindText ure (C API)",

	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, d im, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"	* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture<T, di m, readMode >&) "cudaGetTextureAlignmentOffset (C++ API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaUnbindTexture(	__inline__ __host__ cudaError_t cudaUnbindTexture(
	const struct texture<T, dim, readMode> &tex	const struct texture<T, dim, readMode> &tex
	)	)
	{	{
	return cudaUnbindTexture(&tex);	return cudaUnbindTexture(&tex);
	}	}

	/************************************************************************ ***	/************************************************************************ ***

	skipping to change at line 659	skipping to change at line 816
	* \param tex - Texture to get offset of	* \param tex - Texture to get offset of
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidTexture,	* ::cudaErrorInvalidTexture,
	* ::cudaErrorInvalidTextureBinding	* ::cudaErrorInvalidTextureBinding
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,	* \sa \ref ::cudaCreateChannelDesc(void) "cudaCreateChannelDesc (C++ API)" ,
	* ::cudaGetChannelDesc, ::cudaGetTextureReference,	* ::cudaGetChannelDesc, ::cudaGetTextureReference,

	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTextur	const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture
	e (C++ API)",	(C++ API)",
	* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&	* \ref ::cudaBindTexture(size_t*, const struct texture<T, dim, readMode>&,
	, const void*, size_t) "cudaBindTexture (C++ API, inherited channel descrip	const void*, size_t) "cudaBindTexture (C++ API, inherited channel descript
	tor)",	or)",
	* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_	&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t
	t) "cudaBindTexture2D (C++ API)",	) "cudaBindTexture2D (C++ API)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTexture2D(size_t*, const struct texture<T, dim, readMode>
	const struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindText	&, const void*, size_t, size_t, size_t) "cudaBindTexture2D (C++ API, inheri
	ureToArray (C++ API)",	ted channel descriptor)",
	* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&,	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	const struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channe	onst struct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindTextu
	l descriptor)",	reToArray (C++ API)",
	* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cuda	* \ref ::cudaBindTextureToArray(const struct texture<T, dim, readMode>&, c
	UnbindTexture (C++ API)",	onst struct cudaArray*) "cudaBindTextureToArray (C++ API, inherited channel
		descriptor)",
		* \ref ::cudaUnbindTexture(const struct texture<T, dim, readMode>&) "cudaU
		nbindTexture (C++ API)",
	* \ref ::cudaGetTextureAlignmentOffset(size_t, const struct textureRefere nce) "cudaGetTextureAlignmentOffset (C API)"	* \ref ::cudaGetTextureAlignmentOffset(size_t, const struct textureRefere nce) "cudaGetTextureAlignmentOffset (C API)"
	*/	*/
	template<class T, int dim, enum cudaTextureReadMode readMode>	template<class T, int dim, enum cudaTextureReadMode readMode>
	__inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset(	__inline__ __host__ cudaError_t cudaGetTextureAlignmentOffset(
	size_t *offset,	size_t *offset,
	const struct texture<T, dim, readMode> &tex	const struct texture<T, dim, readMode> &tex
	)	)
	{	{
	return cudaGetTextureAlignmentOffset(offset, &tex);	return cudaGetTextureAlignmentOffset(offset, &tex);
	}	}


	/** @} / / END CUDART_HIGHLEVEL */

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	/**	/**

	* \ingroup CUDART_HIGHLEVEL
	* \brief Sets the preferred cache configuration for a device function	* \brief Sets the preferred cache configuration for a device function
	*	*
	* On devices where the L1 cache and shared memory use the same hardware	* On devices where the L1 cache and shared memory use the same hardware
	* resources, this sets through \p cacheConfig the preferred cache configur ation	* resources, this sets through \p cacheConfig the preferred cache configur ation
	* for the function specified via \p func. This is only a preference. The	* for the function specified via \p func. This is only a preference. The
	* runtime will use the requested configuration if possible, but it is free to	* runtime will use the requested configuration if possible, but it is free to
	* choose a different configuration if required to execute \p func.	* choose a different configuration if required to execute \p func.
	*	*
	* \p func can either be a pointer to a function that executes	* \p func can either be a pointer to a function that executes
	* on the device, or it can be a character string specifying the	* on the device, or it can be a character string specifying the
	* fully-decorated (C++) name for a function that executes on the device.	* fully-decorated (C++) name for a function that executes on the device.
	* The parameter specified by \p func must be declared as a \p __global__	* The parameter specified by \p func must be declared as a \p __global__
	* function. If the specified function does not exist,	* function. If the specified function does not exist,
	* then ::cudaErrorInvalidDeviceFunction is returned.	* then ::cudaErrorInvalidDeviceFunction is returned.
	*	*
	* This setting does nothing on devices where the size of the L1 cache and	* This setting does nothing on devices where the size of the L1 cache and
	* shared memory are fixed.	* shared memory are fixed.
	*	*

	* Switching between configuration modes may insert a device-side	* Launching a kernel with a different preference than the most recent
	* synchronization point for streamed kernel launches.	* preference setting may insert a device-side synchronization point.
	*	*

	* \param func - Device char string naming device function	* The supported cache configurations are:
	* \param cacheConfig - Cache configuration mode	* - ::cudaFuncCachePreferNone: no preference for shared memory or L1 (defa
		ult)
		* - ::cudaFuncCachePreferShared: prefer larger shared memory and smaller L
		1 cache
		* - ::cudaFuncCachePreferL1: prefer larger L1 cache and smaller shared mem
		ory
		*
		* \param func - Char string naming device function
		* \param cacheConfig - Requested cache configuration
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInitializationError,	* ::cudaErrorInitializationError,
	* ::cudaErrorInvalidDeviceFunction	* ::cudaErrorInvalidDeviceFunction
	* \notefnerr	* \notefnerr
	*	*
	* \sa ::cudaConfigureCall,	* \sa ::cudaConfigureCall,
	* \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc SetCacheConfig (C API)",	* \ref ::cudaFuncSetCacheConfig(const char*, enum cudaFuncCache) "cudaFunc SetCacheConfig (C API)",
	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, T) "cudaFuncGe tAttributes (C++ API)",	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, T) "cudaFuncGe tAttributes (C++ API)",
	* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",	* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",
	* ::cudaSetDoubleForDevice,	* ::cudaSetDoubleForDevice,
	* ::cudaSetDoubleForHost,	* ::cudaSetDoubleForHost,

	* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)"	* \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)",
		* ::cudaThreadGetCacheConfig,
		* ::cudaThreadSetCacheConfig
	*/	*/
	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaFuncSetCacheConfig(	__inline__ __host__ cudaError_t cudaFuncSetCacheConfig(
	T *func,	T *func,
	enum cudaFuncCache cacheConfig	enum cudaFuncCache cacheConfig
	)	)
	{	{
	return cudaFuncSetCacheConfig((const char*)func, cacheConfig);	return cudaFuncSetCacheConfig((const char*)func, cacheConfig);
	}	}

	/**	/**

	* \ingroup CUDART_HIGHLEVEL
	* \brief \hl Launches a device function	* \brief \hl Launches a device function
	*	*
	* Launches the function \p entry on the device. The parameter \p entry can	* Launches the function \p entry on the device. The parameter \p entry can
	* either be a function that executes on the device, or it can be a charact er	* either be a function that executes on the device, or it can be a charact er
	* string, naming a function that executes on the device. The parameter	* string, naming a function that executes on the device. The parameter
	* specified by \p entry must be declared as a \p __global__ function.	* specified by \p entry must be declared as a \p __global__ function.
	* \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to	* \ref ::cudaLaunch(T*) "cudaLaunch()" must be preceded by a call to
	* ::cudaConfigureCall() since it pops the data that was pushed by	* ::cudaConfigureCall() since it pops the data that was pushed by
	* ::cudaConfigureCall() from the execution stack.	* ::cudaConfigureCall() from the execution stack.
	*	*
	* \param entry - Device function pointer or char string naming device func tion	* \param entry - Device function pointer or char string naming device func tion
	* to execute	* to execute
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidDeviceFunction,	* ::cudaErrorInvalidDeviceFunction,
	* ::cudaErrorInvalidConfiguration,	* ::cudaErrorInvalidConfiguration,
	* ::cudaErrorLaunchFailure,	* ::cudaErrorLaunchFailure,

	* ::cudaErrorPriorLaunchFailure,
	* ::cudaErrorLaunchTimeout,	* ::cudaErrorLaunchTimeout,
	* ::cudaErrorLaunchOutOfResources,	* ::cudaErrorLaunchOutOfResources,
	* ::cudaErrorSharedObjectSymbolNotFound,	* ::cudaErrorSharedObjectSymbolNotFound,
	* ::cudaErrorSharedObjectInitFailed	* ::cudaErrorSharedObjectInitFailed
	* \notefnerr	* \notefnerr
	*	*
	* \sa ::cudaConfigureCall,	* \sa ::cudaConfigureCall,
	* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",	* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",
	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, T) "cudaFuncGe tAttributes (C++ API)",	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, T) "cudaFuncGe tAttributes (C++ API)",
	* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",	* \ref ::cudaLaunch(const char*) "cudaLaunch (C API)",
	* ::cudaSetDoubleForDevice,	* ::cudaSetDoubleForDevice,
	* ::cudaSetDoubleForHost,	* ::cudaSetDoubleForHost,

	* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)"	* \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)",
		* ::cudaThreadGetCacheConfig,
		* ::cudaThreadSetCacheConfig
	*/	*/
	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaLaunch(	__inline__ __host__ cudaError_t cudaLaunch(
	T *entry	T *entry
	)	)
	{	{
	return cudaLaunch((const char*)entry);	return cudaLaunch((const char*)entry);
	}	}

	/**	/**

	* \ingroup CUDART_HIGHLEVEL
	* \brief \hl Find out attributes for a given function	* \brief \hl Find out attributes for a given function
	*	*
	* This function obtains the attributes of a function specified via \p entr y.	* This function obtains the attributes of a function specified via \p entr y.
	* The parameter \p entry can either be a pointer to a function that execut es	* The parameter \p entry can either be a pointer to a function that execut es
	* on the device, or it can be a character string specifying the	* on the device, or it can be a character string specifying the
	* fully-decorated (C++) name of a function that executes on the device. Th e	* fully-decorated (C++) name of a function that executes on the device. Th e
	* parameter specified by \p entry must be declared as a \p __global__	* parameter specified by \p entry must be declared as a \p __global__
	* function. The fetched attributes are placed in \p attr. If the specified	* function. The fetched attributes are placed in \p attr. If the specified
	* function does not exist, then ::cudaErrorInvalidDeviceFunction is return ed.	* function does not exist, then ::cudaErrorInvalidDeviceFunction is return ed.
	*	*

		* Note that some function attributes such as
		* \ref ::cudaFuncAttributes::maxThreadsPerBlock "maxThreadsPerBlock"
		* may vary based on the device that is currently being used.
		*
	* \param attr - Return pointer to function's attributes	* \param attr - Return pointer to function's attributes
	* \param entry - Function to get attributes of	* \param entry - Function to get attributes of
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInitializationError,	* ::cudaErrorInitializationError,
	* ::cudaErrorInvalidDeviceFunction	* ::cudaErrorInvalidDeviceFunction
	* \notefnerr	* \notefnerr
	*	*
	* \sa ::cudaConfigureCall,	* \sa ::cudaConfigureCall,
	* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",	* \ref ::cudaFuncSetCacheConfig(T*, enum cudaFuncCache) "cudaFuncSetCacheC onfig (C++ API)",
	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, const char) "c udaFuncGetAttributes (C API)",	* \ref ::cudaFuncGetAttributes(struct cudaFuncAttributes, const char) "c udaFuncGetAttributes (C API)",
	* \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)",	* \ref ::cudaLaunch(T*) "cudaLaunch (C++ API)",
	* ::cudaSetDoubleForDevice,	* ::cudaSetDoubleForDevice,
	* ::cudaSetDoubleForHost,	* ::cudaSetDoubleForHost,

	* \ref ::cudaSetupArgument(T,size_t) "cudaSetupArgument (C++ API)"	* \ref ::cudaSetupArgument(T, size_t) "cudaSetupArgument (C++ API)"
	*/	*/
	template<class T>	template<class T>
	__inline__ __host__ cudaError_t cudaFuncGetAttributes(	__inline__ __host__ cudaError_t cudaFuncGetAttributes(
	struct cudaFuncAttributes *attr,	struct cudaFuncAttributes *attr,
	T *entry	T *entry
	)	)
	{	{
	return cudaFuncGetAttributes(attr, (const char*)entry);	return cudaFuncGetAttributes(attr, (const char*)entry);
	}	}

	/**	/**

	* \ingroup CUDART_HIGHLEVEL
	* \brief \hl Binds an array to a surface	* \brief \hl Binds an array to a surface
	*	*
	* Binds the CUDA array \p array to the surface reference \p surf.	* Binds the CUDA array \p array to the surface reference \p surf.
	* \p desc describes how the memory is interpreted when dealing with	* \p desc describes how the memory is interpreted when dealing with
	* the surface. Any CUDA array previously bound to \p surf is unbound.	* the surface. Any CUDA array previously bound to \p surf is unbound.
	*	*
	* \param surf - Surface to bind	* \param surf - Surface to bind
	* \param array - Memory array on device	* \param array - Memory array on device
	* \param desc - Channel format	* \param desc - Channel format
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidSurface	* ::cudaErrorInvalidSurface
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference, const struct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",	* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference, const struct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",

	* \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru ct cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descript or)"	* \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc t cudaArray*) "cudaBindSurfaceToArray (C++ API, inherited channel descripto r)"
	*/	*/
	template<class T, int dim>	template<class T, int dim>
	__inline__ __host__ cudaError_t cudaBindSurfaceToArray(	__inline__ __host__ cudaError_t cudaBindSurfaceToArray(
	const struct surface<T, dim> &surf,	const struct surface<T, dim> &surf,
	const struct cudaArray *array,	const struct cudaArray *array,
	const struct cudaChannelFormatDesc &desc	const struct cudaChannelFormatDesc &desc
	)	)
	{	{
	return cudaBindSurfaceToArray(&surf, array, &desc);	return cudaBindSurfaceToArray(&surf, array, &desc);
	}	}

	/**	/**

	* \ingroup CUDART_HIGHLEVEL
	* \brief \hl Binds an array to a surface	* \brief \hl Binds an array to a surface
	*	*
	* Binds the CUDA array \p array to the surface reference \p surf.	* Binds the CUDA array \p array to the surface reference \p surf.
	* The channel descriptor is inherited from the CUDA array. Any CUDA array	* The channel descriptor is inherited from the CUDA array. Any CUDA array
	* previously bound to \p surf is unbound.	* previously bound to \p surf is unbound.
	*	*
	* \param surf - Surface to bind	* \param surf - Surface to bind
	* \param array - Memory array on device	* \param array - Memory array on device
	*	*
	* \return	* \return
	* ::cudaSuccess,	* ::cudaSuccess,
	* ::cudaErrorInvalidValue,	* ::cudaErrorInvalidValue,
	* ::cudaErrorInvalidSurface	* ::cudaErrorInvalidSurface
	* \notefnerr	* \notefnerr
	*	*
	* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference, const struct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",	* \sa \ref ::cudaBindSurfaceToArray(const struct surfaceReference, const struct cudaArray, const struct cudaChannelFormatDesc*) "cudaBindSurfaceToA rray (C API)",

	* \ref ::cudaBindSurfaceToArray(const struct surface< T, dim>&, const stru ct cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray (C++ API)"	* \ref ::cudaBindSurfaceToArray(const struct surface<T, dim>&, const struc t cudaArray*, const struct cudaChannelFormatDesc&) "cudaBindSurfaceToArray (C++ API)"
	*/	*/
	template<class T, int dim>	template<class T, int dim>
	__inline__ __host__ cudaError_t cudaBindSurfaceToArray(	__inline__ __host__ cudaError_t cudaBindSurfaceToArray(
	const struct surface<T, dim> &surf,	const struct surface<T, dim> &surf,
	const struct cudaArray *array	const struct cudaArray *array
	)	)
	{	{
	struct cudaChannelFormatDesc desc;	struct cudaChannelFormatDesc desc;
	cudaError_t err = cudaGetChannelDesc(&desc, array);	cudaError_t err = cudaGetChannelDesc(&desc, array);

	return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e rr;	return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : e rr;
	}	}

	#endif /* __CUDACC__ */	#endif /* __CUDACC__ */


		/** @} / / END CUDART_HIGHLEVEL */

	#endif /* __cplusplus */	#endif /* __cplusplus */

	#endif /* !__CUDA_RUNTIME_H__ */	#endif /* !__CUDA_RUNTIME_H__ */

End of changes. 41 change blocks.
	147 lines changed or deleted	367 lines changed or added

	cuda_vdpau_interop.h	cuda_vdpau_interop.h

	skipping to change at line 39	skipping to change at line 39
	* source code with only those rights set forth herein.	* source code with only those rights set forth herein.
	*	*
	* Any use of this source code in individual and commercial software must	* Any use of this source code in individual and commercial software must
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#if !defined(__CUDA_VDPAU_INTEROP_H__)	#if !defined(__CUDA_VDPAU_INTEROP_H__)
	#define __CUDA_VDPAU_INTEROP_H__	#define __CUDA_VDPAU_INTEROP_H__


	/**************************************************************************
	*****
	*
	*
	*
	*
	*
	*
	***************************************************************************
	****/

	#include "builtin_types.h"	#include "builtin_types.h"
	#include "host_defines.h"	#include "host_defines.h"

	#include <vdpau/vdpau.h>	#include <vdpau/vdpau.h>

	#if defined(__cplusplus)	#if defined(__cplusplus)
	extern "C" {	extern "C" {
	#endif /* __cplusplus */	#endif /* __cplusplus */


	/**************************************************************************	/**
	*****	* \addtogroup CUDART_VDPAU VDPAU Interoperability
	*	* This section describes the VDPAU interoperability functions of the CUDA
	*	* runtime application programming interface.
	*	*
	*	* @{
	*	*/
	*
	***************************************************************************
	****/


		/**
		* \brief Gets the CUDA device associated with a VdpDevice.
		*
		* Returns the CUDA device associated with a VdpDevice, if applicable.
		*
		* \param device - Returns the device associated with vdpDevice, or -1 if
		* the device associated with vdpDevice is not a compute device.
		* \param vdpDevice - A VdpDevice handle
		* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
		*
		* \return
		* ::cudaSuccess
		* \notefnerr
		*
		* \sa ::cudaVDPAUSetVDPAUDevice
		*/
	extern __host__ cudaError_t CUDARTAPI cudaVDPAUGetDevice(int device, VdpDe vice vdpDevice, VdpGetProcAddress vdpGetProcAddress);	extern __host__ cudaError_t CUDARTAPI cudaVDPAUGetDevice(int device, VdpDe vice vdpDevice, VdpGetProcAddress vdpGetProcAddress);


		/**
		* \brief Sets the CUDA device for use with VDPAU interoperability
		*
		* Records \p device as the device on which the active host thread executes
		* the device code. Records the thread as using VDPAU interoperability.
		* If the host thread has already initialized the CUDA runtime by
		* calling non-device management runtime functions or if there exists a CUD
		A
		* driver context active on the host thread, then this call returns
		* ::cudaErrorSetOnActiveProcess.
		*
		* \param device - Device to use for VDPAU interoperability
		* \param vdpDevice - The VdpDevice to interoperate with
		* \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorSetOnActiveProcess
		* \notefnerr
		*
		* \sa ::cudaGraphicsVDPAURegisterVideoSurface,
		* ::cudaGraphicsVDPAURegisterOutputSurface
		*/
	extern __host__ cudaError_t CUDARTAPI cudaVDPAUSetVDPAUDevice(int device, V dpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);	extern __host__ cudaError_t CUDARTAPI cudaVDPAUSetVDPAUDevice(int device, V dpDevice vdpDevice, VdpGetProcAddress *vdpGetProcAddress);


		/**
		* \brief Register a VdpVideoSurface object
		*
		* Registers the VdpVideoSurface specified by \p vdpSurface for access by C
		UDA.
		* A handle to the registered object is returned as \p resource.
		* The surface's intended usage is specified using \p flags, as follows:
		*
		* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
		* will not write to this resource.
		* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* \param resource - Pointer to the returned object handle
		* \param vdpSurface - VDPAU object to be registered
		* \param flags - Map flags
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidResourceHandle,
		* ::cudaErrorUnknown
		* \notefnerr
		*
		* \sa
		* ::cudaVDPAUSetVDPAUDevice
		* ::cudaGraphicsUnregisterResource,
		* ::cudaGraphicsSubResourceGetMappedArray
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterVideoSurface (struct cudaGraphicsResource **resource, VdpVideoSurface vdpSurface, unsign ed int flags);	extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterVideoSurface (struct cudaGraphicsResource **resource, VdpVideoSurface vdpSurface, unsign ed int flags);


		/**
		* \brief Register a VdpOutputSurface object
		*
		* Registers the VdpOutputSurface specified by \p vdpSurface for access by
		CUDA.
		* A handle to the registered object is returned as \p resource.
		* The surface's intended usage is specified using \p flags, as follows:
		*
		* - ::cudaGraphicsMapFlagsNone: Specifies no hints about how this
		* resource will be used. It is therefore assumed that this resource will
		be
		* read from and written to by CUDA. This is the default value.
		* - ::cudaGraphicsMapFlagsReadOnly: Specifies that CUDA
		* will not write to this resource.
		* - ::cudaGraphicsMapFlagsWriteDiscard: Specifies that
		* CUDA will not read from this resource and will write over the
		* entire contents of the resource, so none of the data previously
		* stored in the resource will be preserved.
		*
		* \param resource - Pointer to the returned object handle
		* \param vdpSurface - VDPAU object to be registered
		* \param flags - Map flags
		*
		* \return
		* ::cudaSuccess,
		* ::cudaErrorInvalidDevice,
		* ::cudaErrorInvalidValue,
		* ::cudaErrorInvalidResourceHandle,
		* ::cudaErrorUnknown
		* \notefnerr
		*
		* \sa
		* ::cudaVDPAUSetVDPAUDevice
		* ::cudaGraphicsUnregisterResource,
		* ::cudaGraphicsSubResourceGetMappedArray
		*/
	extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterOutputSurfac e(struct cudaGraphicsResource **resource, VdpOutputSurface vdpSurface, unsi gned int flags);	extern __host__ cudaError_t CUDARTAPI cudaGraphicsVDPAURegisterOutputSurfac e(struct cudaGraphicsResource **resource, VdpOutputSurface vdpSurface, unsi gned int flags);


		/** @} / / END CUDART_VDPAU */

	#if defined(__cplusplus)	#if defined(__cplusplus)
	}	}
	#endif /* __cplusplus */	#endif /* __cplusplus */

	#endif /* __CUDA_VDPAU_INTEROP_H__ */	#endif /* __CUDA_VDPAU_INTEROP_H__ */

End of changes. 7 change blocks.
	21 lines changed or deleted	124 lines changed or added

	cufft.h	cufft.h

	skipping to change at line 69	skipping to change at line 69
	// CUFFT API function return values	// CUFFT API function return values
	typedef enum cufftResult_t {	typedef enum cufftResult_t {
	CUFFT_SUCCESS = 0x0,	CUFFT_SUCCESS = 0x0,
	CUFFT_INVALID_PLAN = 0x1,	CUFFT_INVALID_PLAN = 0x1,
	CUFFT_ALLOC_FAILED = 0x2,	CUFFT_ALLOC_FAILED = 0x2,
	CUFFT_INVALID_TYPE = 0x3,	CUFFT_INVALID_TYPE = 0x3,
	CUFFT_INVALID_VALUE = 0x4,	CUFFT_INVALID_VALUE = 0x4,
	CUFFT_INTERNAL_ERROR = 0x5,	CUFFT_INTERNAL_ERROR = 0x5,
	CUFFT_EXEC_FAILED = 0x6,	CUFFT_EXEC_FAILED = 0x6,
	CUFFT_SETUP_FAILED = 0x7,	CUFFT_SETUP_FAILED = 0x7,

	CUFFT_INVALID_SIZE = 0x8	CUFFT_INVALID_SIZE = 0x8,
		CUFFT_UNALIGNED_DATA = 0x9
	} cufftResult;	} cufftResult;

	// CUFFT defines and supports the following data types	// CUFFT defines and supports the following data types

	// cufftHandle is a handle type used to store and access CUFFT plans.	// cufftHandle is a handle type used to store and access CUFFT plans.
	typedef unsigned int cufftHandle;	typedef unsigned int cufftHandle;

	// cufftReal is a single-precision, floating-point real data type.	// cufftReal is a single-precision, floating-point real data type.
	// cufftDoubleReal is a double-precision, real data type.	// cufftDoubleReal is a double-precision, real data type.
	typedef float cufftReal;	typedef float cufftReal;

End of changes. 1 change blocks.
	1 lines changed or deleted	2 lines changed or added

	device_functions.h	device_functions.h

	skipping to change at line 76	skipping to change at line 76
	extern __device__ long long int __mul64hi(long long int, long long int);	extern __device__ long long int __mul64hi(long long int, long long int);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ unsigned long long int __umul64hi(unsigned long long int, unsigned long long int);	extern __device__ unsigned long long int __umul64hi(unsigned long long int, unsigned long long int);

	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ float __int_as_float(int);	extern __device__ float __int_as_float(int);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ int __float_as_int(float);	extern __device__ int __float_as_int(float);

	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/

	extern __device__ void __synchronous_start(int);
	/DEVICE_BUILTIN/
	extern __device__ void __synchronous_end(void);
	/DEVICE_BUILTIN/
	extern __device__ void __syncthreads(void);	extern __device__ void __syncthreads(void);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ void __prof_trigger(int);	extern __device__ void __prof_trigger(int);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ void __threadfence(void);	extern __device__ void __threadfence(void);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ void __threadfence_block(void);	extern __device__ void __threadfence_block(void);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	extern __device__ void __trap(void);	extern __device__ void __trap(void);
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/

End of changes. 1 change blocks.
	4 lines changed or deleted	0 lines changed or added

	driver_functions.h	driver_functions.h

	skipping to change at line 39	skipping to change at line 39
	* source code with only those rights set forth herein.	* source code with only those rights set forth herein.
	*	*
	* Any use of this source code in individual and commercial software must	* Any use of this source code in individual and commercial software must
	* include, in the user documentation and internal comments to the code,	* include, in the user documentation and internal comments to the code,
	* the above Disclaimer and U.S. Government End Users Notice.	* the above Disclaimer and U.S. Government End Users Notice.
	*/	*/

	#if !defined(__DRIVER_FUNCTIONS_H__)	#if !defined(__DRIVER_FUNCTIONS_H__)
	#define __DRIVER_FUNCTIONS_H__	#define __DRIVER_FUNCTIONS_H__


	/**************************************************************************
	*****
	*
	*
	*
	*
	*
	*
	***************************************************************************
	****/

	#include "builtin_types.h"	#include "builtin_types.h"
	#include "host_defines.h"	#include "host_defines.h"
	#include "driver_types.h"	#include "driver_types.h"


	/**************************************************************************	/**
	*****	* \addtogroup CUDART_MEMORY
	*	*
	*	* @{
	*	*/
	*
	*
	*
	***************************************************************************
	****/


		/**
		* \brief Returns a ::cudaPitchedPtr based on input parameters
		*
		* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
		* \p p, \p xsz, and \p ysz.
		*
		* \param d - Pointer to allocated memory
		* \param p - Pitch of allocated memory in bytes
		* \param xsz - Logical width of allocation in elements
		* \param ysz - Logical height of allocation in elements
		*
		* \return
		* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
		*
		* \sa make_cudaExtent, make_cudaPos
		*/
	static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void * d, size_t p, size_t xsz, size_t ysz)	static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void * d, size_t p, size_t xsz, size_t ysz)
	{	{
	struct cudaPitchedPtr s;	struct cudaPitchedPtr s;

	s.ptr = d;	s.ptr = d;
	s.pitch = p;	s.pitch = p;
	s.xsize = xsz;	s.xsize = xsz;
	s.ysize = ysz;	s.ysize = ysz;

	return s;	return s;
	}	}


		/**
		* \brief Returns a ::cudaPos based on input parameters
		*
		* Returns a ::cudaPos based on the specified input parameters \p x,
		* \p y, and \p z.
		*
		* \param x - X position
		* \param y - Y position
		* \param z - Z position
		*
		* \return
		* ::cudaPos specified by \p x, \p y, and \p z
		*
		* \sa make_cudaExtent, make_cudaPitchedPtr
		*/
	static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)	static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
	{	{
	struct cudaPos p;	struct cudaPos p;

	p.x = x;	p.x = x;
	p.y = y;	p.y = y;
	p.z = z;	p.z = z;

	return p;	return p;
	}	}


		/**
		* \brief Returns a ::cudaExtent based on input parameters
		*
		* Returns a ::cudaExtent based on the specified input parameters \p w,
		* \p h, and \p d.
		*
		* \param w - Width in bytes
		* \param h - Height in elements
		* \param d - Depth in elements
		*
		* \return
		* ::cudaExtent specified by \p w, \p h, and \p d
		*
		* \sa make_cudaPitchedPtr, make_cudaPos
		*/
	static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size _t h, size_t d)	static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size _t h, size_t d)
	{	{
	struct cudaExtent e;	struct cudaExtent e;

	e.width = w;	e.width = w;
	e.height = h;	e.height = h;
	e.depth = d;	e.depth = d;

	return e;	return e;
	}	}


		/** @} / / END CUDART_MEMORY */

	#endif /* !__DRIVER_FUNCTIONS_H__ */	#endif /* !__DRIVER_FUNCTIONS_H__ */

End of changes. 6 change blocks.
	21 lines changed or deleted	53 lines changed or added

	driver_types.h	driver_types.h

	skipping to change at line 63	skipping to change at line 63
	* *	* *
	* TYPE DEFINITIONS USED BY RUNTIME API *	* TYPE DEFINITIONS USED BY RUNTIME API *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	#if !defined(__CUDA_INTERNAL_COMPILATION__)	#if !defined(__CUDA_INTERNAL_COMPILATION__)

	#include <limits.h>	#include <limits.h>
	#include <stddef.h>	#include <stddef.h>


	#define cudaHostAllocDefault 0 ///< Default page-locked allocation	#define cudaHostAllocDefault 0 /**< Default page-locked allocation
	flag	flag */
	#define cudaHostAllocPortable 1 ///< Pinned memory accessible by al	#define cudaHostAllocPortable 1 /**< Pinned memory accessible by al
	l CUDA contexts	l CUDA contexts */
	#define cudaHostAllocMapped 2 ///< Map allocation into device spa	#define cudaHostAllocMapped 2 /**< Map allocation into device spa
	ce	ce */
	#define cudaHostAllocWriteCombined 4 ///< Write-combined memory	#define cudaHostAllocWriteCombined 4 /*< Write-combined memory /


	#define cudaEventDefault 0 ///< Default event flag	#define cudaEventDefault 0 /*< Default event flag /
	#define cudaEventBlockingSync 1 ///< Event uses blocking synchroniz	#define cudaEventBlockingSync 1 /**< Event uses blocking synchroniz
	ation	ation */
		#define cudaEventDisableTiming 2 /**< Event will not record timing d
		ata */


	#define cudaDeviceScheduleAuto 0 ///< Device flag - Automatic schedu	#define cudaDeviceScheduleAuto 0 /**< Device flag - Automatic schedu
	ling	ling */
	#define cudaDeviceScheduleSpin 1 ///< Device flag - Spin default sch	#define cudaDeviceScheduleSpin 1 /**< Device flag - Spin default sch
	eduling	eduling */
	#define cudaDeviceScheduleYield 2 ///< Device flag - Yield default sc	#define cudaDeviceScheduleYield 2 /**< Device flag - Yield default sc
	heduling	heduling */
	#define cudaDeviceBlockingSync 4 ///< Device flag - Use blocking syn	#define cudaDeviceBlockingSync 4 /**< Device flag - Use blocking syn
	chronization	chronization */
	#define cudaDeviceMapHost 8 ///< Device flag - Support mapped p	#define cudaDeviceMapHost 8 /**< Device flag - Support mapped p
	inned allocations	inned allocations */
	#define cudaDeviceLmemResizeToMax 16 ///< Device flag - Keep local memor	#define cudaDeviceLmemResizeToMax 16 /**< Device flag - Keep local memor
	y allocation after launch	y allocation after launch */
	#define cudaDeviceMask 0x1f ///< Device flags mask	#define cudaDeviceMask 0x1f /*< Device flags mask /


	#define cudaArraySurfaceLoadStore 0x02 ///< Must be set in cudaMallocArra	#define cudaArrayDefault 0x00 /**< Default CUDA array allocation
	y in order to bind surfaces to the CUDA array	flag */
		#define cudaArraySurfaceLoadStore 0x02 /**< Must be set in cudaMallocArray
		in order to bind surfaces to the CUDA array */

	#endif /* !__CUDA_INTERNAL_COMPILATION__ */	#endif /* !__CUDA_INTERNAL_COMPILATION__ */

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	/**	/**
	* CUDA error types	* CUDA error types
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaError	enum cudaError
	{	{

	cudaSuccess = 0, ///< No errors	/**
	cudaErrorMissingConfiguration = 1, ///< Missing configurat	* The API call returned with no errors. In the case of query calls, this
	ion error	* can also mean that the operation being queried is complete (see
	cudaErrorMemoryAllocation = 2, ///< Memory allocation	* ::cudaEventQuery() and ::cudaStreamQuery()).
	error	*/
	cudaErrorInitializationError = 3, ///< Initialization err	cudaSuccess = 0,
	or
	cudaErrorLaunchFailure = 4, ///< Launch failure	/**
	cudaErrorPriorLaunchFailure = 5, ///< Prior launch failu	* The device function being invoked (usually via ::cudaLaunch()) was not
	re	* previously configured via the ::cudaConfigureCall() function.
	cudaErrorLaunchTimeout = 6, ///< Launch timeout err	*/
	or	cudaErrorMissingConfiguration = 1,
	cudaErrorLaunchOutOfResources = 7, ///< Launch out of reso
	urces error	/**
	cudaErrorInvalidDeviceFunction = 8, ///< Invalid device fun	* The API call failed because it was unable to allocate enough memory to
	ction	* perform the requested operation.
	cudaErrorInvalidConfiguration = 9, ///< Invalid configurat	*/
	ion	cudaErrorMemoryAllocation = 2,
	cudaErrorInvalidDevice = 10, ///< Invalid device
	cudaErrorInvalidValue = 11, ///< Invalid value	/**
	cudaErrorInvalidPitchValue = 12, ///< Invalid pitch valu	* The API call failed because the CUDA driver and runtime could not be
	e	* initialized.
	cudaErrorInvalidSymbol = 13, ///< Invalid symbol	*/
	cudaErrorMapBufferObjectFailed = 14, ///< Map buffer object	cudaErrorInitializationError = 3,
	failed
	cudaErrorUnmapBufferObjectFailed = 15, ///< Unmap buffer objec	/**
	t failed	* An exception occurred on the device while executing a kernel. Common
	cudaErrorInvalidHostPointer = 16, ///< Invalid host point	* causes include dereferencing an invalid device pointer and accessing
	er	* out of bounds shared memory. The device cannot be used until
	cudaErrorInvalidDevicePointer = 17, ///< Invalid device poi	* ::cudaThreadExit() is called. All existing device memory allocations
	nter	* are invalid and must be reconstructed if the program is to continue
	cudaErrorInvalidTexture = 18, ///< Invalid texture	* using CUDA.
	cudaErrorInvalidTextureBinding = 19, ///< Invalid texture bi	*/
	nding	cudaErrorLaunchFailure = 4,
	cudaErrorInvalidChannelDescriptor = 20, ///< Invalid channel de
	scriptor	/**
	cudaErrorInvalidMemcpyDirection = 21, ///< Invalid memcpy dir	* This indicated that a previous kernel launch failed. This was previous
	ection	ly
	cudaErrorAddressOfConstant = 22, ///< Address of constan	* used for device emulation of kernel launches.
	t error	* \deprecated
	///< \deprecated	* This error return is deprecated as of CUDA 3.1. Device emulation mode
	///< This error return	was
	is deprecated as of	* removed with the CUDA 3.1 release.
	///< Cuda 3.1. Variable	*/
	s in constant memory	cudaErrorPriorLaunchFailure = 5,
	///< may now have their
	address taken by the	/**
	///< runtime via ::cuda	* This indicates that the device kernel took too long to execute. This c
	GetSymbolAddress().	an
	cudaErrorTextureFetchFailed = 23, ///< Texture fetch fail	* only occur if timeouts are enabled - see the device property
	ed	* \ref ::cudaDeviceProp::kernelExecTimeoutEnabled "kernelExecTimeoutEnab
	cudaErrorTextureNotBound = 24, ///< Texture not bound	led"
	error	* for more information. The device cannot be used until ::cudaThreadExit
	cudaErrorSynchronizationError = 25, ///< Synchronization er	()
	ror	* is called. All existing device memory allocations are invalid and must
	cudaErrorInvalidFilterSetting = 26, ///< Invalid filter set	be
	ting	* reconstructed if the program is to continue using CUDA.
	cudaErrorInvalidNormSetting = 27, ///< Invalid norm setti	*/
	ng	cudaErrorLaunchTimeout = 6,
	cudaErrorMixedDeviceExecution = 28, ///< Mixed device execu
	tion	/**
	cudaErrorCudartUnloading = 29, ///< CUDA runtime unloa	* This indicates that a launch did not occur because it did not have
	ding	* appropriate resources. Although this error is similar to
	cudaErrorUnknown = 30, ///< Unknown error cond	* ::cudaErrorInvalidConfiguration, this error usually indicates that the
	ition	* user has attempted to pass too many arguments to the device kernel, or
	cudaErrorNotYetImplemented = 31, ///< Function not yet i	the
	mplemented	* kernel launch specifies too many threads for the kernel's register cou
	cudaErrorMemoryValueTooLarge = 32, ///< Memory value too l	nt.
	arge	*/
	cudaErrorInvalidResourceHandle = 33, ///< Invalid resource h	cudaErrorLaunchOutOfResources = 7,
	andle
	cudaErrorNotReady = 34, ///< Not ready error	/**
	cudaErrorInsufficientDriver = 35, ///< CUDA runtime is ne	* The requested device function does not exist or is not compiled for th
	wer than driver	e
	cudaErrorSetOnActiveProcess = 36, ///< Set on active proc	* proper device architecture.
	ess error	*/
	cudaErrorInvalidSurface = 37, ///< Invalid surface	cudaErrorInvalidDeviceFunction = 8,
	cudaErrorNoDevice = 38, ///< No Cuda-capable de
	vices detected	/**
	cudaErrorECCUncorrectable = 39, ///< Uncorrectable ECC	* This indicates that a kernel launch is requesting resources that can
	error detected	* never be satisfied by the current device. Requesting more shared memor
	cudaErrorSharedObjectSymbolNotFound = 40, ///< Link to a shared o	y
	bject failed to resolve	* per block than the device supports will trigger this error, as will
	cudaErrorSharedObjectInitFailed = 41, ///< Shared object init	* requesting too many threads or blocks. See ::cudaDeviceProp for more
	ialization failed	* device limitations.
	cudaErrorUnsupportedLimit = 42, ///< ::cudaLimit not su	*/
	pported by device	cudaErrorInvalidConfiguration = 9,
	cudaErrorDuplicateVariableName = 43, ///< Duplicate global v
	ariable lookup by string name	/**
	cudaErrorDuplicateTextureName = 44, ///< Duplicate texture	* This indicates that the device ordinal supplied by the user does not
	lookup by string name	* correspond to a valid CUDA device.
	cudaErrorDuplicateSurfaceName = 45, ///< Duplicate surface	*/
	lookup by string name	cudaErrorInvalidDevice = 10,
	cudaErrorDevicesUnavailable = 46, ///< All Cuda-capable d
	evices are busy (see ::cudaComputeMode) or unavailable	/**
	cudaErrorStartupFailure = 0x7f, ///< Startup failure	* This indicates that one or more of the parameters passed to the API ca
	cudaErrorApiFailureBase = 10000 ///< API failure base	ll
		* is not within an acceptable range of values.
		*/
		cudaErrorInvalidValue = 11,

		/**
		* This indicates that one or more of the pitch-related parameters passed
		* to the API call is not within the acceptable range for pitch.
		*/
		cudaErrorInvalidPitchValue = 12,

		/**
		* This indicates that the symbol name/identifier passed to the API call
		* is not a valid name or identifier.
		*/
		cudaErrorInvalidSymbol = 13,

		/**
		* This indicates that the buffer object could not be mapped.
		*/
		cudaErrorMapBufferObjectFailed = 14,

		/**
		* This indicates that the buffer object could not be unmapped.
		*/
		cudaErrorUnmapBufferObjectFailed = 15,

		/**
		* This indicates that at least one host pointer passed to the API call i
		s
		* not a valid host pointer.
		*/
		cudaErrorInvalidHostPointer = 16,

		/**
		* This indicates that at least one device pointer passed to the API call
		is
		* not a valid device pointer.
		*/
		cudaErrorInvalidDevicePointer = 17,

		/**
		* This indicates that the texture passed to the API call is not a valid
		* texture.
		*/
		cudaErrorInvalidTexture = 18,

		/**
		* This indicates that the texture binding is not valid. This occurs if y
		ou
		* call ::cudaGetTextureAlignmentOffset() with an unbound texture.
		*/
		cudaErrorInvalidTextureBinding = 19,

		/**
		* This indicates that the channel descriptor passed to the API call is n
		ot
		* valid. This occurs if the format is not one of the formats specified b
		y
		* ::cudaChannelFormatKind, or if one of the dimensions is invalid.
		*/
		cudaErrorInvalidChannelDescriptor = 20,

		/**
		* This indicates that the direction of the memcpy passed to the API call
		is
		* not one of the types specified by ::cudaMemcpyKind.
		*/
		cudaErrorInvalidMemcpyDirection = 21,

		/**
		* This indicated that the user has taken the address of a constant varia
		ble,
		* which was forbidden up until the CUDA 3.1 release.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Variables in constant
		* memory may now have their address taken by the runtime via
		* ::cudaGetSymbolAddress().
		*/
		cudaErrorAddressOfConstant = 22,

		/**
		* This indicated that a texture fetch was not able to be performed.
		* This was previously used for device emulation of texture operations.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Device emulation mode
		was
		* removed with the CUDA 3.1 release.
		*/
		cudaErrorTextureFetchFailed = 23,

		/**
		* This indicated that a texture was not bound for access.
		* This was previously used for device emulation of texture operations.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Device emulation mode
		was
		* removed with the CUDA 3.1 release.
		*/
		cudaErrorTextureNotBound = 24,

		/**
		* This indicated that a synchronization operation had failed.
		* This was previously used for some device emulation functions.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Device emulation mode
		was
		* removed with the CUDA 3.1 release.
		*/
		cudaErrorSynchronizationError = 25,

		/**
		* This indicates that a non-float texture was being accessed with linear
		* filtering. This is not supported by CUDA.
		*/
		cudaErrorInvalidFilterSetting = 26,

		/**
		* This indicates that an attempt was made to read a non-float texture as
		a
		* normalized float. This is not supported by CUDA.
		*/
		cudaErrorInvalidNormSetting = 27,

		/**
		* Mixing of device and device emulation code was not allowed.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Device emulation mode
		was
		* removed with the CUDA 3.1 release.
		*/
		cudaErrorMixedDeviceExecution = 28,

		/**
		* This indicated an issue with calling API functions during the unload
		* process of the CUDA runtime in prior releases.
		* \deprecated
		* This error return is deprecated as of CUDA 3.2.
		*/
		cudaErrorCudartUnloading = 29,

		/**
		* This indicates that an unknown internal error has occurred.
		*/
		cudaErrorUnknown = 30,

		/**
		* This indicates that the API call is not yet implemented. Production
		* releases of CUDA will never return this error.
		*/
		cudaErrorNotYetImplemented = 31,

		/**
		* This indicated that an emulated device pointer exceeded the 32-bit add
		ress
		* range.
		* \deprecated
		* This error return is deprecated as of CUDA 3.1. Device emulation mode
		was
		* removed with the CUDA 3.1 release.
		*/
		cudaErrorMemoryValueTooLarge = 32,

		/**
		* This indicates that a resource handle passed to the API call was not
		* valid. Resource handles are opaque types like ::cudaStream_t and
		* ::cudaEvent_t.
		*/
		cudaErrorInvalidResourceHandle = 33,

		/**
		* This indicates that asynchronous operations issued previously have not
		* completed yet. This result is not actually an error, but must be indic
		ated
		* differently than ::cudaSuccess (which indicates completion). Calls tha
		t
		* may return this value include ::cudaEventQuery() and ::cudaStreamQuery
		().
		*/
		cudaErrorNotReady = 34,

		/**
		* This indicates that the installed NVIDIA CUDA driver is older than the
		* CUDA runtime library. This is not a supported configuration. Users sho
		uld
		* install an updated NVIDIA display driver to allow the application to r
		un.
		*/
		cudaErrorInsufficientDriver = 35,

		/**
		* This indicates that the user has called ::cudaSetDevice(),
		* ::cudaSetValidDevices(), ::cudaSetDeviceFlags(),
		* ::cudaD3D9SetDirect3DDevice(), ::cudaD3D10SetDirect3DDevice,
		* ::cudaD3D11SetDirect3DDevice(), * or ::cudaVDPAUSetVDPAUDevice() after
		* initializing the CUDA runtime by calling non-device management operati
		ons
		* (allocating memory and launching kernels are examples of non-device
		* management operations). This error can also be returned if using
		* runtime/driver interoperability and there is an existing ::CUcontext
		* active on the host thread.
		*/
		cudaErrorSetOnActiveProcess = 36,

		/**
		* This indicates that the surface passed to the API call is not a valid
		* surface.
		*/
		cudaErrorInvalidSurface = 37,

		/**
		* This indicates that no CUDA-capable devices were detected by the insta
		lled
		* CUDA driver.
		*/
		cudaErrorNoDevice = 38,

		/**
		* This indicates that an uncorrectable ECC error was detected during
		* execution.
		*/
		cudaErrorECCUncorrectable = 39,

		/**
		* This indicates that a link to a shared object failed to resolve.
		*/
		cudaErrorSharedObjectSymbolNotFound = 40,

		/**
		* This indicates that initialization of a shared object failed.
		*/
		cudaErrorSharedObjectInitFailed = 41,

		/**
		* This indicates that the ::cudaLimit passed to the API call is not
		* supported by the active device.
		*/
		cudaErrorUnsupportedLimit = 42,

		/**
		* This indicates that multiple global or constant variables (across sepa
		rate
		* CUDA source files in the application) share the same string name.
		*/
		cudaErrorDuplicateVariableName = 43,

		/**
		* This indicates that multiple textures (across separate CUDA source
		* files in the application) share the same string name.
		*/
		cudaErrorDuplicateTextureName = 44,

		/**
		* This indicates that multiple surfaces (across separate CUDA source
		* files in the application) share the same string name.
		*/
		cudaErrorDuplicateSurfaceName = 45,

		/**
		* This indicates that all CUDA devices are busy or unavailable at the cu
		rrent
		* time. Devices are often busy/unavailable due to use of
		* ::cudaComputeModeExclusive or ::cudaComputeModeProhibited. They can al
		so
		* be unavailable due to memory constraints on a device that already has
		* active CUDA work being performed.
		*/
		cudaErrorDevicesUnavailable = 46,

		/**
		* This indicates that the device kernel image is invalid.
		*/
		cudaErrorInvalidKernelImage = 47,

		/**
		* This indicates that there is no kernel image available that is suitabl
		e
		* for the device. This can occur when a user specifies code generation
		* options for a particular CUDA source file that do not include the
		* corresponding device configuration.
		*/
		cudaErrorNoKernelImageForDevice = 48,

		/**
		* This indicates that the current context is not compatible with this
		* version of the CUDA Runtime. This can only occur if you are using CUDA
		* Runtime/Driver interoperability and have created an existing Driver
		* context using an older API. Please see \ref CUDART_DRIVER
		* "Interactions with the CUDA Driver API" for more information.
		*/
		cudaErrorIncompatibleDriverContext = 49,

		/**
		* This indicates an internal startup failure in the CUDA runtime.
		*/
		cudaErrorStartupFailure = 0x7f,

		/**
		* Any unhandled CUDA driver error is added to this value and returned vi
		a
		* the runtime. Production releases of CUDA should not return such errors
		.
		*/
		cudaErrorApiFailureBase = 10000
	};	};

	/**	/**
	* Channel format kind	* Channel format kind
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaChannelFormatKind	enum cudaChannelFormatKind
	{	{

	cudaChannelFormatKindSigned = 0, ///< Signed channel for	cudaChannelFormatKindSigned = 0, /**< Signed channel for
	mat	mat */
	cudaChannelFormatKindUnsigned = 1, ///< Unsigned channel f	cudaChannelFormatKindUnsigned = 1, /**< Unsigned channel f
	ormat	ormat */
	cudaChannelFormatKindFloat = 2, ///< Float channel form	cudaChannelFormatKindFloat = 2, /**< Float channel form
	at	at */
	cudaChannelFormatKindNone = 3 ///< No channel format	cudaChannelFormatKindNone = 3 /**< No channel format
		*/
	};	};

	/**	/**
	* CUDA Channel format descriptor	* CUDA Channel format descriptor
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaChannelFormatDesc	struct cudaChannelFormatDesc
	{	{

	int x; ///< x	int x; /*< x /
	int y; ///< y	int y; /*< y /
	int z; ///< z	int z; /*< z /
	int w; ///< w	int w; /*< w /
	enum cudaChannelFormatKind f; ///< Channel format kind	enum cudaChannelFormatKind f; /*< Channel format kind /
	};	};

	/**	/**
	* CUDA array	* CUDA array
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaArray;	struct cudaArray;

	/**	/**
	* CUDA memory copy types	* CUDA memory copy types
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaMemcpyKind	enum cudaMemcpyKind
	{	{

	cudaMemcpyHostToHost = 0, ///< Host -> Host	cudaMemcpyHostToHost = 0, /*< Host -> Host /
	cudaMemcpyHostToDevice = 1, ///< Host -> Device	cudaMemcpyHostToDevice = 1, /*< Host -> Device /
	cudaMemcpyDeviceToHost = 2, ///< Device -> Host	cudaMemcpyDeviceToHost = 2, /*< Device -> Host /
	cudaMemcpyDeviceToDevice = 3 ///< Device -> Device	cudaMemcpyDeviceToDevice = 3 /*< Device -> Device /
	};	};

	/**	/**
	* CUDA Pitched memory pointer	* CUDA Pitched memory pointer
	* \sa ::make_cudaPitchedPtr	* \sa ::make_cudaPitchedPtr
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaPitchedPtr	struct cudaPitchedPtr
	{	{

	void *ptr; ///< Pointer to allocated memory	void ptr; /< Pointer to allocated memory /
	size_t pitch; ///< Pitch of allocated memory in bytes	size_t pitch; /*< Pitch of allocated memory in bytes /
	size_t xsize; ///< Logical width of allocation in elements	size_t xsize; /*< Logical width of allocation in elements /
	size_t ysize; ///< Logical height of allocation in elements	size_t ysize; /*< Logical height of allocation in elements /
	};	};

	/**	/**
	* CUDA extent	* CUDA extent
	* \sa ::make_cudaExtent	* \sa ::make_cudaExtent
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaExtent	struct cudaExtent
	{	{

	size_t width; ///< Width in bytes	size_t width; /**< Width in elements when referring to array memory,
	size_t height; ///< Height in elements	in bytes when referring to linear memory */
	size_t depth; ///< Depth in elements	size_t height; /*< Height in elements /
		size_t depth; /*< Depth in elements /
	};	};

	/**	/**
	* CUDA 3D position	* CUDA 3D position
	* \sa ::make_cudaPos	* \sa ::make_cudaPos
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaPos	struct cudaPos
	{	{

	size_t x; ///< x	size_t x; /*< x /
	size_t y; ///< y	size_t y; /*< y /
	size_t z; ///< z	size_t z; /*< z /
	};	};

	/**	/**
	* CUDA 3D memory copying parameters	* CUDA 3D memory copying parameters
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaMemcpy3DParms	struct cudaMemcpy3DParms
	{	{

	struct cudaArray *srcArray; ///< Source memory address	struct cudaArray srcArray; /< Source memory address /
	struct cudaPos srcPos; ///< Source position offset	struct cudaPos srcPos; /*< Source position offset /
	struct cudaPitchedPtr srcPtr; ///< Pitched source memory address	struct cudaPitchedPtr srcPtr; /*< Pitched source memory address /


	struct cudaArray *dstArray; ///< Destination memory address	struct cudaArray dstArray; /< Destination memory address /
	struct cudaPos dstPos; ///< Destination position offset	struct cudaPos dstPos; /*< Destination position offset /
	struct cudaPitchedPtr dstPtr; ///< Pitched destination memory address	struct cudaPitchedPtr dstPtr; /**< Pitched destination memory address
		*/


	struct cudaExtent extent; ///< Requested memory copy size	struct cudaExtent extent; /*< Requested memory copy size /
	enum cudaMemcpyKind kind; ///< Type of transfer	enum cudaMemcpyKind kind; /*< Type of transfer /
	};	};

	/**	/**
	* CUDA graphics interop resource	* CUDA graphics interop resource
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaGraphicsResource;	struct cudaGraphicsResource;

	/**	/**
	* CUDA graphics interop register flags	* CUDA graphics interop register flags
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaGraphicsRegisterFlags	enum cudaGraphicsRegisterFlags
	{	{

	cudaGraphicsRegisterFlagsNone = 0 ///< Default	cudaGraphicsRegisterFlagsNone = 0 /*< Default /
	};	};

	/**	/**
	* CUDA graphics interop map flags	* CUDA graphics interop map flags
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaGraphicsMapFlags	enum cudaGraphicsMapFlags
	{	{

	cudaGraphicsMapFlagsNone = 0, ///< Default; Assume resource can	cudaGraphicsMapFlagsNone = 0, /**< Default; Assume resource can
	be read/written	be read/written */
	cudaGraphicsMapFlagsReadOnly = 1, ///< CUDA will not write to this r	cudaGraphicsMapFlagsReadOnly = 1, /**< CUDA will not write to this r
	esource	esource */
	cudaGraphicsMapFlagsWriteDiscard = 2 ///< CUDA will only write to and w	cudaGraphicsMapFlagsWriteDiscard = 2 /**< CUDA will only write to and w
	ill not read from this resource	ill not read from this resource */
	};	};

	/**	/**
	* CUDA graphics interop array indices for cube maps	* CUDA graphics interop array indices for cube maps
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaGraphicsCubeFace {	enum cudaGraphicsCubeFace {

	cudaGraphicsCubeFacePositiveX = 0x00, ///< Positive X face of cubemap	cudaGraphicsCubeFacePositiveX = 0x00, /*< Positive X face of cubemap /
	cudaGraphicsCubeFaceNegativeX = 0x01, ///< Negative X face of cubemap	cudaGraphicsCubeFaceNegativeX = 0x01, /*< Negative X face of cubemap /
	cudaGraphicsCubeFacePositiveY = 0x02, ///< Positive Y face of cubemap	cudaGraphicsCubeFacePositiveY = 0x02, /*< Positive Y face of cubemap /
	cudaGraphicsCubeFaceNegativeY = 0x03, ///< Negative Y face of cubemap	cudaGraphicsCubeFaceNegativeY = 0x03, /*< Negative Y face of cubemap /
	cudaGraphicsCubeFacePositiveZ = 0x04, ///< Positive Z face of cubemap	cudaGraphicsCubeFacePositiveZ = 0x04, /*< Positive Z face of cubemap /
	cudaGraphicsCubeFaceNegativeZ = 0x05 ///< Negative Z face of cubemap	cudaGraphicsCubeFaceNegativeZ = 0x05 /*< Negative Z face of cubemap /
	};	};

	/**	/**
	* CUDA function attributes	* CUDA function attributes
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaFuncAttributes	struct cudaFuncAttributes
	{	{

	size_t sharedSizeBytes; ///< Size of shared memory in bytes	/**
	size_t constSizeBytes; ///< Size of constant memory in bytes	* The size in bytes of statically-allocated shared memory per block
	size_t localSizeBytes; ///< Size of local memory in bytes	* required by this function. This does not include dynamically-allocate
	int maxThreadsPerBlock; ///< Maximum number of threads per block	d
	int numRegs; ///< Number of registers used	* shared memory requested by the user at runtime.
	/** \brief PTX virtual architecture version for which the function was	*/
	* compiled. This value is the major PTX version * 10 + the minor PTX	size_t sharedSizeBytes;
	* version, so a PTX version 1.3 function would return the value 13.
	* For device emulation kernels, this is set to 9999.	/**
		* The size in bytes of user-allocated constant memory required by this
		* function.
		*/
		size_t constSizeBytes;

		/**
		* The size in bytes of local memory used by each thread of this functio
		n.
		*/
		size_t localSizeBytes;

		/**
		* The maximum number of threads per block, beyond which a launch of the
		* function would fail. This number depends on both the function and the
		* device on which the function is currently loaded.
		*/
		int maxThreadsPerBlock;

		/**
		* The number of registers used by each thread of this function.
		*/
		int numRegs;

		/**
		* The PTX virtual architecture version for which the function was
		* compiled. This value is the major PTX version * 10 + the minor PTX
		* version, so a PTX version 1.3 function would return the value 13.
	*/	*/
	int ptxVersion;	int ptxVersion;

	/** \brief Binary architecture version for which the function was compil
	ed.	/**
	* This value is the major binary version * 10 + the minor binary versi	* The binary architecture version for which the function was compiled.
	on,	* This value is the major binary version * 10 + the minor binary versio
	* so a binary version 1.3 function would return the value 13.	n,
	* For device emulation kernels, this is set to 9999.	* so a binary version 1.3 function would return the value 13.
	*/	*/
	int binaryVersion;	int binaryVersion;


	int __cudaReserved[6];	int __cudaReserved[6];
	};	};

	/**	/**
	* CUDA function cache configurations	* CUDA function cache configurations
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaFuncCache	enum cudaFuncCache
	{	{

	cudaFuncCachePreferNone = 0, ///< Default function cache configurati	cudaFuncCachePreferNone = 0, /**< Default function cache configurati
	on, no preference	on, no preference */
	cudaFuncCachePreferShared = 1, ///< Prefer larger shared memory and sm	cudaFuncCachePreferShared = 1, /**< Prefer larger shared memory and sm
	aller L1 cache	aller L1 cache */
	cudaFuncCachePreferL1 = 2 ///< Prefer larger L1 cache and smaller	cudaFuncCachePreferL1 = 2 /**< Prefer larger L1 cache and smaller
	shared memory	shared memory */
	};	};

	/**	/**
	* CUDA device compute modes	* CUDA device compute modes
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaComputeMode	enum cudaComputeMode
	{	{

	cudaComputeModeDefault = 0, ///< Default compute mode (Multiple thr	cudaComputeModeDefault = 0, /**< Default compute mode (Multiple thr
	eads can use ::cudaSetDevice() with this device)	eads can use ::cudaSetDevice() with this device) */
	cudaComputeModeExclusive = 1, ///< Compute-exclusive mode (Only one t	cudaComputeModeExclusive = 1, /**< Compute-exclusive mode (Only one t
	hread will be able to use ::cudaSetDevice() with this device)	hread will be able to use ::cudaSetDevice() with this device) */
	cudaComputeModeProhibited = 2 ///< Compute-prohibited mode (No thread	cudaComputeModeProhibited = 2 /**< Compute-prohibited mode (No thread
	s can use ::cudaSetDevice() with this device)	s can use ::cudaSetDevice() with this device) */
	};	};

	/**	/**
	* CUDA Limits	* CUDA Limits
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaLimit	enum cudaLimit
	{	{

	cudaLimitStackSize = 0x00, ///< GPU thread stack size	cudaLimitStackSize = 0x00, /*< GPU thread stack size /
	cudaLimitPrintfFifoSize = 0x01 ///< GPU printf FIFO size	cudaLimitPrintfFifoSize = 0x01, /*< GPU printf FIFO size /
		cudaLimitMallocHeapSize = 0x02 /*< GPU malloc heap size /
	};	};

	/**	/**
	* CUDA device properties	* CUDA device properties
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct cudaDeviceProp	struct cudaDeviceProp
	{	{

	char name[256]; ///< ASCII string identifying device	char name[256]; /*< ASCII string identifying device /
	size_t totalGlobalMem; ///< Global memory available on device	size_t totalGlobalMem; /**< Global memory available on device
	in bytes	in bytes */
	size_t sharedMemPerBlock; ///< Shared memory available per block	size_t sharedMemPerBlock; /**< Shared memory available per block
	in bytes	in bytes */
	int regsPerBlock; ///< 32-bit registers available per blo	int regsPerBlock; /**< 32-bit registers available per blo
	ck	ck */
	int warpSize; ///< Warp size in threads	int warpSize; /*< Warp size in threads /
	size_t memPitch; ///< Maximum pitch in bytes allowed by	size_t memPitch; /**< Maximum pitch in bytes allowed by
	memory copies	memory copies */
	int maxThreadsPerBlock; ///< Maximum number of threads per bloc	int maxThreadsPerBlock; /**< Maximum number of threads per bloc
	k	k */
	int maxThreadsDim[3]; ///< Maximum size of each dimension of	int maxThreadsDim[3]; /**< Maximum size of each dimension of
	a block	a block */
	int maxGridSize[3]; ///< Maximum size of each dimension of	int maxGridSize[3]; /**< Maximum size of each dimension of
	a grid	a grid */
	int clockRate; ///< Clock frequency in kilohertz	int clockRate; /*< Clock frequency in kilohertz /
	size_t totalConstMem; ///< Constant memory available on devic	size_t totalConstMem; /**< Constant memory available on devic
	e in bytes	e in bytes */
	int major; ///< Major compute capability	int major; /*< Major compute capability /
	int minor; ///< Minor compute capability	int minor; /*< Minor compute capability /
	size_t textureAlignment; ///< Alignment requirement for textures	size_t textureAlignment; /**< Alignment requirement for textures
	int deviceOverlap; ///< Device can concurrently copy memor	*/
	y and execute a kernel	int deviceOverlap; /**< Device can concurrently copy memor
	int multiProcessorCount; ///< Number of multiprocessors on devic	y and execute a kernel */
	e	int multiProcessorCount; /**< Number of multiprocessors on devic
	int kernelExecTimeoutEnabled; ///< Specified whether there is a run t	e */
	ime limit on kernels	int kernelExecTimeoutEnabled; /**< Specified whether there is a run t
	int integrated; ///< Device is integrated as opposed to	ime limit on kernels */
	discrete	int integrated; /**< Device is integrated as opposed to
	int canMapHostMemory; ///< Device can map host memory with cu	discrete */
	daHostAlloc/cudaHostGetDevicePointer	int canMapHostMemory; /**< Device can map host memory with cu
	int computeMode; ///< Compute mode (See ::cudaComputeMod	daHostAlloc/cudaHostGetDevicePointer */
	e)	int computeMode; /**< Compute mode (See ::cudaComputeMod
	int maxTexture1D; ///< Maximum 1D texture size	e) */
	int maxTexture2D[2]; ///< Maximum 2D texture dimensions	int maxTexture1D; /*< Maximum 1D texture size /
	int maxTexture3D[3]; ///< Maximum 3D texture dimensions	int maxTexture2D[2]; /*< Maximum 2D texture dimensions /
	int maxTexture2DArray[3]; ///< Maximum 2D texture array dimension	int maxTexture3D[3]; /*< Maximum 3D texture dimensions /
	s	int maxTexture2DArray[3]; /**< Maximum 2D texture array dimension
	size_t surfaceAlignment; ///< Alignment requirements for surface	s */
	s	size_t surfaceAlignment; /**< Alignment requirements for surface
	int concurrentKernels; ///< Device can possibly execute multip	s */
	le kernels concurrently	int concurrentKernels; /**< Device can possibly execute multip
	int ECCEnabled; ///< Device has ECC support enabled	le kernels concurrently */
	int pciBusID; ///< PCI bus ID of the device	int ECCEnabled; /*< Device has ECC support enabled /
	int pciDeviceID; ///< PCI device ID of the device	int pciBusID; /*< PCI bus ID of the device /
	int __cudaReserved[22];	int pciDeviceID; /*< PCI device ID of the device /
		int tccDriver; /**< 1 if device is a Tesla device usin
		g TCC driver, 0 otherwise */
		int __cudaReserved[21];
	};	};

	#define cudaDevicePropDontCare \	#define cudaDevicePropDontCare \
	{ \	{ \
	{'\0'}, /* char name[256]; */ \	{'\0'}, /* char name[256]; */ \
	0, /* size_t totalGlobalMem; */ \	0, /* size_t totalGlobalMem; */ \
	0, /* size_t sharedMemPerBlock; */ \	0, /* size_t sharedMemPerBlock; */ \
	0, /* int regsPerBlock; */ \	0, /* int regsPerBlock; */ \
	0, /* int warpSize; */ \	0, /* int warpSize; */ \
	0, /* size_t memPitch; */ \	0, /* size_t memPitch; */ \

	skipping to change at line 412	skipping to change at line 751
	0, /* int kernelExecTimeoutEnabled */ \	0, /* int kernelExecTimeoutEnabled */ \
	0, /* int integrated */ \	0, /* int integrated */ \
	0, /* int canMapHostMemory */ \	0, /* int canMapHostMemory */ \
	0, /* int computeMode */ \	0, /* int computeMode */ \
	0, /* int maxTexture1D */ \	0, /* int maxTexture1D */ \
	{0, 0}, /* int maxTexture2D[2] */ \	{0, 0}, /* int maxTexture2D[2] */ \
	{0, 0, 0}, /* int maxTexture3D[3] */ \	{0, 0, 0}, /* int maxTexture3D[3] */ \
	{0, 0, 0}, /* int maxTexture2DArray[3] */ \	{0, 0, 0}, /* int maxTexture2DArray[3] */ \
	0, /* size_t surfaceAlignment */ \	0, /* size_t surfaceAlignment */ \
	0, /* int concurrentKernels */ \	0, /* int concurrentKernels */ \

	0 /* int ECCEnabled */ \	0, /* int ECCEnabled */ \
	} ///< Empty device properties	0, /* int pciBusID */ \
		0, /* int pciDeviceID */ \
		0 /* int tccDriver */ \
		} /*< Empty device properties /

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* SHORTHAND TYPE DEFINITION USED BY RUNTIME API *	* SHORTHAND TYPE DEFINITION USED BY RUNTIME API *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	/**	/**
	* CUDA Error types	* CUDA Error types
	*/	*/

	skipping to change at line 440	skipping to change at line 782
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	typedef struct CUstream_st *cudaStream_t;	typedef struct CUstream_st *cudaStream_t;

	/**	/**
	* CUDA event types	* CUDA event types
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	typedef struct CUevent_st *cudaEvent_t;	typedef struct CUevent_st *cudaEvent_t;

	/**	/**

		* CUDA graphics resource types
		*/
		/DEVICE_BUILTIN/
		typedef struct cudaGraphicsResource *cudaGraphicsResource_t;

		/**
	* CUDA UUID types	* CUDA UUID types
	*/	*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	typedef struct CUuuid_st cudaUUID_t;	typedef struct CUuuid_st cudaUUID_t;

	/** @} */	/** @} */
	/** @} / / END CUDART_TYPES */	/** @} / / END CUDART_TYPES */

	#endif /* !__DRIVER_TYPES_H__ */	#endif /* !__DRIVER_TYPES_H__ */

End of changes. 26 change blocks.
	247 lines changed or deleted	598 lines changed or added

	host_config.h	host_config.h

	skipping to change at line 104	skipping to change at line 104
	#endif /* !NOMINMAX */	#endif /* !NOMINMAX */

	#include <crtdefs.h> /* for _CRTIMP */	#include <crtdefs.h> /* for _CRTIMP */

	#define __THROW	#define __THROW

	#endif /* __APPLE__ */	#endif /* __APPLE__ */

	#endif /* __CUDACC__ */	#endif /* __CUDACC__ */


		#if defined(__ICC)

		#if __ICC != 1110 \|\| !defined(__GNUC__) \|\| !defined(__LP64__)

		#error -- unsupported ICC configuration! Only ICC 11.1 on Linux x86_64 is s
		upported!

		#endif /* __ICC != 1110 \|\| !__GNUC__ \|\| !__LP64__ */

		#endif /* __ICC */

	#endif /* !__HOST_CONFIG_H__ */	#endif /* !__HOST_CONFIG_H__ */

End of changes. 1 change blocks.
	0 lines changed or deleted	11 lines changed or added

	host_runtime.h	host_runtime.h

	skipping to change at line 85	skipping to change at line 85
	__cudaFatCubinHandle = __cudaRegisterFatBinary((void*)&__fatDeviceT ext); \	__cudaFatCubinHandle = __cudaRegisterFatBinary((void*)&__fatDeviceT ext); \
	atexit(__cudaUnregisterBinaryUtil)	atexit(__cudaUnregisterBinaryUtil)
	#define __cudaRegisterVariable(var, ext, size, constant, global) \	#define __cudaRegisterVariable(var, ext, size, constant, global) \
	__cudaRegisterVar(__cudaFatCubinHandle, (char)&__host##var, (char )__device##var, __name##var, ext, size, constant, global)	__cudaRegisterVar(__cudaFatCubinHandle, (char)&__host##var, (char )__device##var, __name##var, ext, size, constant, global)
	#define __cudaRegisterGlobalTexture(tex, dim, norm, ext) \	#define __cudaRegisterGlobalTexture(tex, dim, norm, ext) \
	__cudaRegisterTexture(__cudaFatCubinHandle, (const struct textureRe ference)&tex, (const void*)__device##tex, __name##tex, dim, norm, ext)	__cudaRegisterTexture(__cudaFatCubinHandle, (const struct textureRe ference)&tex, (const void*)__device##tex, __name##tex, dim, norm, ext)
	#define __cudaRegisterGlobalSurface(surf, dim, ext) \	#define __cudaRegisterGlobalSurface(surf, dim, ext) \
	__cudaRegisterSurface(__cudaFatCubinHandle, (const struct surfaceRe ference)&surf, (const void*)__device##surf, __name##surf, dim, ext)	__cudaRegisterSurface(__cudaFatCubinHandle, (const struct surfaceRe ference)&surf, (const void*)__device##surf, __name##surf, dim, ext)
	#define __cudaRegisterEntry(funptr, fun, thread_limit) \	#define __cudaRegisterEntry(funptr, fun, thread_limit) \
	__cudaRegisterFunction(__cudaFatCubinHandle, (const char)funptr, ( char)__device_fun(fun), #fun, -1, (uint3)0, (uint3)0, (dim3)0, (dim3)0 , (int*)0)	__cudaRegisterFunction(__cudaFatCubinHandle, (const char)funptr, ( char)__device_fun(fun), #fun, -1, (uint3)0, (uint3)0, (dim3)0, (dim3)0 , (int*)0)

	#define __cudaInitArgBlock(arg) \
	(void)(void)&arg = (void*)0
	#define __cudaSetupArg(arg, offset) \	#define __cudaSetupArg(arg, offset) \
	if (cudaSetupArgument((void)(char)&arg, sizeof(arg), (size_t)&off set->arg) != cudaSuccess) \	if (cudaSetupArgument((void)(char)&arg, sizeof(arg), (size_t)&off set->arg) != cudaSuccess) \
	return	return
	#define __cudaLaunch(fun) \	#define __cudaLaunch(fun) \
	{ volatile static char *__f; __f = fun; (void)cudaLaunch(fun); }	{ volatile static char *__f; __f = fun; (void)cudaLaunch(fun); }

	extern "C" {	extern "C" {

	extern void** CUDARTAPI __cudaRegisterFatBinary(	extern void** CUDARTAPI __cudaRegisterFatBinary(
	void *fatCubin	void *fatCubin

	skipping to change at line 167	skipping to change at line 165

	static void **__cudaFatCubinHandle;	static void **__cudaFatCubinHandle;

	static void __cdecl __cudaUnregisterBinaryUtil(void)	static void __cdecl __cudaUnregisterBinaryUtil(void)
	{	{
	__cudaUnregisterFatBinary(__cudaFatCubinHandle);	__cudaUnregisterFatBinary(__cudaFatCubinHandle);
	}	}

	#include "common_functions.h"	#include "common_functions.h"


		#if defined(__APPLE__)

		#pragma options align=natural

		#else /* __APPLE__ */

		#pragma pack()

	#if defined(_WIN32)	#if defined(_WIN32)

	#pragma warning(disable: 4099)	#pragma warning(disable: 4099)

	#if !defined(_WIN64)	#if !defined(_WIN64)

	#pragma warning(disable: 4408)	#pragma warning(disable: 4408)

	#endif /* !_WIN64 */	#endif /* !_WIN64 */

	#endif /* _WIN32 */	#endif /* _WIN32 */


		#endif /* __APPLE__ */

	#endif /* !__CUDA_INTERNAL_COMPILATION__ */	#endif /* !__CUDA_INTERNAL_COMPILATION__ */

End of changes. 3 change blocks.
	2 lines changed or deleted	10 lines changed or added

	math_functions_dbl_ptx3.h	math_functions_dbl_ptx3.h

	skipping to change at line 702	skipping to change at line 702
	t = a / t;	t = a / t;
	t = -a * t;	t = -a * t;
	t = __internal_atanh_kernel(a, t);	t = __internal_atanh_kernel(a, t);
	return t;	return t;
	}	}
	return log (a + CUDART_ONE);	return log (a + CUDART_ONE);
	}	}

	static __forceinline__ double __internal_exp_kernel(double a, int scale)	static __forceinline__ double __internal_exp_kernel(double a, int scale)
	{	{

	double t, fac, z;	double t, z;
	int i, k;	int i, j, k;
	/* exp(a) = 2^(rint(a/log(2)) + z) = 2^(i + z) */
	t = rint (a * CUDART_L2E);	t = rint (a * CUDART_L2E);
	i = (int)t;	i = (int)t;
	z = __fma_rn (t, -CUDART_LN2_HI, a);	z = __fma_rn (t, -CUDART_LN2_HI, a);
	z = __fma_rn (t, -CUDART_LN2_LO, z);	z = __fma_rn (t, -CUDART_LN2_LO, z);

	k = 0x40000000;	t = __internal_expm1_kernel (z);
	if (i <= -1021) {	k = ((i + scale) << 20) + (1023 << 20);
	i += 55;	if (abs(i) < 1021) {
	k -= 55 << 20;	z = __hiloint2double (k, 0);
		z = __fma_rn (t, z, z);
		} else {
		j = 0x40000000;
		if (i < 0) {
		k += (55 << 20);
		j -= (55 << 20);
		}
		k = k - (1 << 20);
		z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise *
		/
		t = __fma_rn (t, z, z);
		z = __hiloint2double (k, 0);
		z = t * z;
	}	}

	fac = __hiloint2double(k, 0); /* 2^-54 if a is denormal, 2.0 otherwise */	return z;
	/* exp(a) = 2^i * e^z */
	t = __internal_expm1_kernel(z);
	z = __hiloint2double(((i + scale) << 20) + ((-1 + 1023) << 20), 0);
	t = __fma_rn (t, z, z);
	t = t * fac;
	return t;
	}	}

	static __forceinline__ double exp(double a)	static __forceinline__ double exp(double a)
	{	{
	double t;	double t;
	int i;	int i;
	i = __double2hiint(a);	i = __double2hiint(a);
	if (((unsigned)i < (unsigned)0x40862e43) \|\| ((int)i < (int)0xC0874911)) {	if (((unsigned)i < (unsigned)0x40862e43) \|\| ((int)i < (int)0xC0874911)) {
	t = __internal_exp_kernel(a, 0);	t = __internal_exp_kernel(a, 0);
	return t;	return t;
	}	}
	t = (i < 0) ? CUDART_ZERO : CUDART_INF;	t = (i < 0) ? CUDART_ZERO : CUDART_INF;
	if (__isnan(a)) {	if (__isnan(a)) {
	t = a + a;	t = a + a;
	}	}
	return t;	return t;
	}	}

	static __forceinline__ double exp2(double a)	static __forceinline__ double exp2(double a)
	{	{

	double z;	double t, z;
	double t;	int i, j, k;
	double fac;
	int i;

	i = __double2hiint(a);	i = __double2hiint(a);
	if (((unsigned)i < (unsigned)0x40900000) \|\| ((int)i < (int)0xc090cc00)) {	if (((unsigned)i < (unsigned)0x40900000) \|\| ((int)i < (int)0xc090cc00)) {
	t = rint (a);	t = rint (a);
	z = a - t;	z = a - t;
	i = (int)t;	i = (int)t;

	fac = 2.0;
	if (i <= -1021) {
	i += 55;
	fac = CUDART_TWO_TO_M54;
	}
	/* 2^z = exp(log(2)z) /	/* 2^z = exp(log(2)z) /
	z = __fma_rn (z, CUDART_LN2_HI, z * CUDART_LN2_LO);	z = __fma_rn (z, CUDART_LN2_HI, z * CUDART_LN2_LO);
	t = __internal_expm1_kernel(z);	t = __internal_expm1_kernel(z);

	z = __internal_exp2i_kernel(i - 1);	k = (i << 20) + (1023 << 20);
	t = __fma_rn (t, z, z);	if (abs(i) < 1023) {
	t = t * fac;	z = __hiloint2double (k, 0);
	return t;	z = __fma_rn (t, z, z);
		} else {
		j = 0x40000000;
		if (i < 0) {
		k += (55 << 20);
		j -= (55 << 20);
		}
		k = k - (1 << 20);
		z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise
		*/
		t = __fma_rn (t, z, z);
		z = __hiloint2double (k, 0);
		z = t * z;
		}
		return z;
	}	}
	t = (i < 0) ? CUDART_ZERO : CUDART_INF;	t = (i < 0) ? CUDART_ZERO : CUDART_INF;
	if (__isnan(a)) {	if (__isnan(a)) {
	t = a + a;	t = a + a;
	}	}
	return t;	return t;
	}	}

	static __forceinline__ double exp10(double a)	static __forceinline__ double exp10(double a)
	{	{
	double z;	double z;
	double t;	double t;

	double fac;	int i, j, k;
	int i;

	i = __double2hiint(a);	i = __double2hiint(a);
	if (((unsigned)i < (unsigned)0x40734414) \|\| ((int)i < (int)0xc07439b8)) {	if (((unsigned)i < (unsigned)0x40734414) \|\| ((int)i < (int)0xc07439b8)) {
	t = rint (a * CUDART_L2T);	t = rint (a * CUDART_L2T);
	i = (int)t;	i = (int)t;
	z = __fma_rn (t, -CUDART_LG2_HI, a);	z = __fma_rn (t, -CUDART_LG2_HI, a);
	z = __fma_rn (t, -CUDART_LG2_LO, z);	z = __fma_rn (t, -CUDART_LG2_LO, z);

	fac = 2.0;
	if (i <= -1021) {
	i += 55;
	fac = CUDART_TWO_TO_M54;
	}
	/* 2^z = exp(log(10)z) /	/* 2^z = exp(log(10)z) /
	z = __fma_rn (z, CUDART_LNT_HI, z * CUDART_LNT_LO);	z = __fma_rn (z, CUDART_LNT_HI, z * CUDART_LNT_LO);
	t = __internal_expm1_kernel(z);	t = __internal_expm1_kernel(z);

	z = __internal_exp2i_kernel(i - 1);	k = (i << 20) + (1023 << 20);
	t = __fma_rn (t, z, z);	if (abs(i) < 1023) {
	t = t * fac;	z = __hiloint2double (k, 0);
	return t;	z = __fma_rn (t, z, z);
		} else {
		j = 0x40000000;
		if (i < 0) {
		k += (55 << 20);
		j -= (55 << 20);
		}
		k = k - (1 << 20);
		z = __hiloint2double (j, 0); /* 2^-54 if a is denormal, 2.0 otherwise
		*/
		t = __fma_rn (t, z, z);
		z = __hiloint2double (k, 0);
		z = t * z;
		}
		return z;
	}	}
	t = (i < 0) ? CUDART_ZERO : CUDART_INF;	t = (i < 0) ? CUDART_ZERO : CUDART_INF;
	if (__isnan(a)) {	if (__isnan(a)) {
	t = a + a;	t = a + a;
	}	}
	return t;	return t;
	}	}

	static __forceinline__ double expm1(double a)	static __forceinline__ double expm1(double a)
	{	{

End of changes. 9 change blocks.
	38 lines changed or deleted	59 lines changed or added

	opencl.h	opencl.h
	/************************************************************************ ***	/************************************************************************ ***

	* Copyright (c) 2008-2009 The Khronos Group Inc.	* Copyright (c) 2008-2010 The Khronos Group Inc.
	*	*
	* Permission is hereby granted, free of charge, to any person obtaining a	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and/or associated documentation files (the	* copy of this software and/or associated documentation files (the
	* "Materials"), to deal in the Materials without restriction, including	* "Materials"), to deal in the Materials without restriction, including
	* without limitation the rights to use, copy, modify, merge, publish,	* without limitation the rights to use, copy, modify, merge, publish,
	* distribute, sublicense, and/or sell copies of the Materials, and to	* distribute, sublicense, and/or sell copies of the Materials, and to
	* permit persons to whom the Materials are furnished to do so, subject to	* permit persons to whom the Materials are furnished to do so, subject to
	* the following conditions:	* the following conditions:
	*	*
	* The above copyright notice and this permission notice shall be included	* The above copyright notice and this permission notice shall be included

	skipping to change at line 24	skipping to change at line 24
	*	*
	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,	* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.	* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
	************************************************************************ **/	************************************************************************ **/


	/* $Revision$ on $Date$ */	/* $Revision: 11708 $ on $Date: 2010-06-14 12:06:24 +0530 (Mon, 14 Jun 2010 ) $ */

	#ifndef __OPENCL_H	#ifndef __OPENCL_H
	#define __OPENCL_H	#define __OPENCL_H

	#ifdef __cplusplus	#ifdef __cplusplus
	extern "C" {	extern "C" {
	#endif	#endif

	#ifdef __APPLE__	#ifdef __APPLE__


End of changes. 2 change blocks.
	2 lines changed or deleted	2 lines changed or added

	surface_types.h	surface_types.h

	skipping to change at line 47	skipping to change at line 47
	#define __SURFACE_TYPES_H__	#define __SURFACE_TYPES_H__

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	#include "driver_types.h"	#include "driver_types.h"


		/**
		* \addtogroup CUDART_TYPES
		*
		* @{
		*/

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/


		/**
		* CUDA Surface boundary modes
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaSurfaceBoundaryMode	enum cudaSurfaceBoundaryMode
	{	{

	cudaBoundaryModeZero = 0,	cudaBoundaryModeZero = 0, /*< Zero boundary mode /
	cudaBoundaryModeClamp = 1,	cudaBoundaryModeClamp = 1, /*< Clamp boundary mode /
	cudaBoundaryModeTrap = 2	cudaBoundaryModeTrap = 2 /*< Trap boundary mode /
	};	};


		/**
		* CUDA Surface format modes
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaSurfaceFormatMode	enum cudaSurfaceFormatMode
	{	{

	cudaFormatModeForced,	cudaFormatModeForced = 0, /*< Forced format mode /
	cudaFormatModeAuto	cudaFormatModeAuto = 1 /*< Auto format mode /
	};	};


		/**
		* CUDA Surface reference
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct surfaceReference	struct surfaceReference
	{	{

		/**
		* Channel descriptor for surface reference
		*/
	struct cudaChannelFormatDesc channelDesc;	struct cudaChannelFormatDesc channelDesc;
	};	};


		/** @} */
		/** @} / / END CUDART_TYPES */

	#endif /* !__SURFACE_TYPES_H__ */	#endif /* !__SURFACE_TYPES_H__ */

End of changes. 8 change blocks.
	5 lines changed or deleted	26 lines changed or added

	texture_fetch_functions.h	texture_fetch_functions.h

	skipping to change at line 1876	skipping to change at line 1876
	}	}

	static __inline__ __device__ float4 tex3D(texture<ushort4, 3, cudaReadModeN ormalizedFloat> t, float x, float y, float z)	static __inline__ __device__ float4 tex3D(texture<ushort4, 3, cudaReadModeN ormalizedFloat> t, float x, float y, float z)
	{	{
	uint4 v = __utexfetch(t, make_float4(x, y, z, 0));	uint4 v = __utexfetch(t, make_float4(x, y, z, 0));
	float4 w = make_float4(__int_as_float(v.x), __int_as_float(v.y), __int_as _float(v.z), __int_as_float(v.w));	float4 w = make_float4(__int_as_float(v.x), __int_as_float(v.y), __int_as _float(v.z), __int_as_float(v.w));

	return make_float4(w.x, w.y, w.z, w.w);	return make_float4(w.x, w.y, w.z, w.w);
	}	}


		#define __utexfetchi \
		000 incorect invocation of builtin __utexfetchi 000
		#define __itexfetchi \
		000 incorect invocation of builtin __itexfetchi 000
		#define __ftexfetchi \
		000 incorect invocation of builtin __ftexfetchi 000
		#define __utexfetch \
		000 incorect invocation of builtin __utexfetch 000
		#define __itexfetch \
		000 incorect invocation of builtin __itexfetch 000
		#define __ftexfetch \
		000 incorect invocation of builtin __ftexfetch 000

	#elif defined(__CUDABE__)	#elif defined(__CUDABE__)

	extern uint4 __utexfetchi1D(const void*, int4);	extern uint4 __utexfetchi1D(const void*, int4);
	extern int4 __itexfetchi1D(const void*, int4);	extern int4 __itexfetchi1D(const void*, int4);
	extern float4 __ftexfetchi1D(const void*, int4);	extern float4 __ftexfetchi1D(const void*, int4);
	extern uint4 __utexfetch1D(const void*, float4);	extern uint4 __utexfetch1D(const void*, float4);
	extern int4 __itexfetch1D(const void*, float4);	extern int4 __itexfetch1D(const void*, float4);
	extern float4 __ftexfetch1D(const void*, float4);	extern float4 __ftexfetch1D(const void*, float4);
	extern uint4 __utexfetch2D(const void*, float4);	extern uint4 __utexfetch2D(const void*, float4);
	extern int4 __itexfetch2D(const void*, float4);	extern int4 __itexfetch2D(const void*, float4);

	skipping to change at line 1906	skipping to change at line 1919
	__ftexfetchi1D(t, i)	__ftexfetchi1D(t, i)
	#define __utexfetch(t, i, d) \	#define __utexfetch(t, i, d) \
	__utexfetch##d##D(t, i)	__utexfetch##d##D(t, i)
	#define __itexfetch(t, i, d) \	#define __itexfetch(t, i, d) \
	__itexfetch##d##D(t, i)	__itexfetch##d##D(t, i)
	#define __ftexfetch(t, i, d) \	#define __ftexfetch(t, i, d) \
	__ftexfetch##d##D(t, i)	__ftexfetch##d##D(t, i)

	#endif /* __cplusplus && __CUDACC__ */	#endif /* __cplusplus && __CUDACC__ */


		#if defined(__cplusplus) && defined(__CUDACC__)

		#if !defined(__CUDA_ARCH__) \|\| __CUDA_ARCH__ >= 200

		/**************************************************************************
		*****
		*
		*
		*
		*
		*
		*
		***************************************************************************
		****/

		/DEVICE_BUILTIN/
		template<int comp, class T> extern __device__ int4 __itex2Dgather(texture
		<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);
		/DEVICE_BUILTIN/
		template<int comp, class T> extern __device__ uint4 __utex2Dgather(texture
		<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);
		/DEVICE_BUILTIN/
		template<int comp, class T> extern __device__ float4 __ftex2Dgather(texture
		<T, 2, cudaReadModeElementType> t, float2 i, int c = comp);

		#define __tex2DgatherUtil(T, f, r, c) \
		{ T v = f<c>(t, make_float2(x, y)); return r; }

		#define __tex2DgatherUtil1(T, f, r) \
		__tex2DgatherUtil(T, f, r, 0)

		#define __tex2DgatherUtil2(T, f, r) \
		if (comp == 1) __tex2DgatherUtil(T, f, r, 1) \
		else __tex2DgatherUtil1(T, f, r)

		#define __tex2DgatherUtil3(T, f, r) \
		if (comp == 2) __tex2DgatherUtil(T, f, r, 2) \
		else __tex2DgatherUtil2(T, f, r)

		#define __tex2DgatherUtil4(T, f, r) \
		if (comp == 3) __tex2DgatherUtil(T, f, r, 3) \
		else __tex2DgatherUtil3(T, f, r)

		static __inline__ __device__ char4 tex2Dgather(texture<char, 2, cudaReadMod
		eElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ char4 tex2Dgather(texture<signed char, 2, cuda
		ReadModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ uchar4 tex2Dgather(texture<unsigned char, 2, c
		udaReadModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
		);
		}

		static __inline__ __device__ char4 tex2Dgather(texture<char1, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ uchar4 tex2Dgather(texture<uchar1, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
		);
		}

		static __inline__ __device__ char4 tex2Dgather(texture<char2, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ uchar4 tex2Dgather(texture<uchar2, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
		);
		}

		static __inline__ __device__ char4 tex2Dgather(texture<char3, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ uchar4 tex2Dgather(texture<uchar3, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
		);
		}

		static __inline__ __device__ char4 tex2Dgather(texture<char4, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(int4, __itex2Dgather, make_char4(v.x, v.y, v.z, v.w));
		}

		static __inline__ __device__ uchar4 tex2Dgather(texture<uchar4, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(uint4, __utex2Dgather, make_uchar4(v.x, v.y, v.z, v.w)
		);
		}

		static __inline__ __device__ short4 tex2Dgather(texture<signed short, 2, cu
		daReadModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
		;
		}

		static __inline__ __device__ ushort4 tex2Dgather(texture<unsigned short, 2,
		cudaReadModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
		));
		}

		static __inline__ __device__ short4 tex2Dgather(texture<short1, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
		;
		}

		static __inline__ __device__ ushort4 tex2Dgather(texture<ushort1, 2, cudaRe
		adModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
		));
		}

		static __inline__ __device__ short4 tex2Dgather(texture<short2, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
		;
		}

		static __inline__ __device__ ushort4 tex2Dgather(texture<ushort2, 2, cudaRe
		adModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
		));
		}

		static __inline__ __device__ short4 tex2Dgather(texture<short3, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
		;
		}

		static __inline__ __device__ ushort4 tex2Dgather(texture<ushort3, 2, cudaRe
		adModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
		));
		}

		static __inline__ __device__ short4 tex2Dgather(texture<short4, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(int4, __itex2Dgather, make_short4(v.x, v.y, v.z, v.w))
		;
		}

		static __inline__ __device__ ushort4 tex2Dgather(texture<ushort4, 2, cudaRe
		adModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(uint4, __utex2Dgather, make_ushort4(v.x, v.y, v.z, v.w
		));
		}

		static __inline__ __device__ int4 tex2Dgather(texture<signed int, 2, cudaRe
		adModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, v);
		}

		static __inline__ __device__ uint4 tex2Dgather(texture<unsigned int, 2, cud
		aReadModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, v);
		}

		static __inline__ __device__ int4 tex2Dgather(texture<int1, 2, cudaReadMode
		ElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(int4, __itex2Dgather, v);
		}

		static __inline__ __device__ uint4 tex2Dgather(texture<uint1, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(uint4, __utex2Dgather, v);
		}

		static __inline__ __device__ int4 tex2Dgather(texture<int2, 2, cudaReadMode
		ElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(int4, __itex2Dgather, v);
		}

		static __inline__ __device__ uint4 tex2Dgather(texture<uint2, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(uint4, __utex2Dgather, v);
		}

		static __inline__ __device__ int4 tex2Dgather(texture<int3, 2, cudaReadMode
		ElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(int4, __itex2Dgather, v);
		}

		static __inline__ __device__ uint4 tex2Dgather(texture<uint3, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(uint4, __utex2Dgather, v);
		}

		static __inline__ __device__ int4 tex2Dgather(texture<int4, 2, cudaReadMode
		ElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(int4, __itex2Dgather, v);
		}

		static __inline__ __device__ uint4 tex2Dgather(texture<uint4, 2, cudaReadMo
		deElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(uint4, __utex2Dgather, v);
		}

		static __inline__ __device__ float4 tex2Dgather(texture<float, 2, cudaReadM
		odeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(float4, __ftex2Dgather, v);
		}

		static __inline__ __device__ float4 tex2Dgather(texture<float1, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil1(float4, __ftex2Dgather, v);
		}

		static __inline__ __device__ float4 tex2Dgather(texture<float2, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil2(float4, __ftex2Dgather, v);
		}

		static __inline__ __device__ float4 tex2Dgather(texture<float3, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil3(float4, __ftex2Dgather, v);
		}

		static __inline__ __device__ float4 tex2Dgather(texture<float4, 2, cudaRead
		ModeElementType> t, float x, float y, int comp = 0)
		{
		__tex2DgatherUtil4(float4, __ftex2Dgather, v);
		}

		#undef __tex2DgatherUtil
		#undef __tex2DgatherUtil1
		#undef __tex2DgatherUtil2
		#undef __tex2DgatherUtil3
		#undef __tex2DgatherUtil4

		#define __utex2Dgather \
		000 incorect invocation of builtin __utex2Dgather 000
		#define __itex2Dgather \
		000 incorect invocation of builtin __itex2Dgather 000
		#define __ftex2Dgather \
		000 incorect invocation of builtin __ftex2Dgather 000

		#endif /* !__CUDA_ARCH__ \|\| __CUDA_ARCH__ >= 200 */

		#elif defined(__CUDABE__)

		extern uint4 __utex2Dgather0(const void*, float2);
		extern uint4 __utex2Dgather1(const void*, float2);
		extern uint4 __utex2Dgather2(const void*, float2);
		extern uint4 __utex2Dgather3(const void*, float2);
		extern int4 __itex2Dgather0(const void*, float2);
		extern int4 __itex2Dgather1(const void*, float2);
		extern int4 __itex2Dgather2(const void*, float2);
		extern int4 __itex2Dgather3(const void*, float2);
		extern float4 __ftex2Dgather0(const void*, float2);
		extern float4 __ftex2Dgather1(const void*, float2);
		extern float4 __ftex2Dgather2(const void*, float2);
		extern float4 __ftex2Dgather3(const void*, float2);

		#define __utex2Dgather(t, i, c) \
		__utex2Dgather##c(t, i)
		#define __itex2Dgather(t, i, c) \
		__itex2Dgather##c(t, i)
		#define __ftex2Dgather(t, i, c) \
		__ftex2Dgather##c(t, i)

		#endif /* __cplusplus && __CUDACC__ */

	#endif /* !__TEXTURE_FETCH_FUNCTIONS_H__ */	#endif /* !__TEXTURE_FETCH_FUNCTIONS_H__ */

End of changes. 2 change blocks.
	0 lines changed or deleted	326 lines changed or added

	texture_types.h	texture_types.h

	skipping to change at line 47	skipping to change at line 47
	#define __TEXTURE_TYPES_H__	#define __TEXTURE_TYPES_H__

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/

	#include "driver_types.h"	#include "driver_types.h"


		/**
		* \addtogroup CUDART_TYPES
		*
		* @{
		*/

	/************************************************************************ ***	/************************************************************************ ***
	* *	* *
	* *	* *
	* *	* *
	************************************************************************* **/	************************************************************************* **/


		/**
		* CUDA texture address modes
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaTextureAddressMode	enum cudaTextureAddressMode
	{	{

	cudaAddressModeWrap,	cudaAddressModeWrap = 0, /*< Wrapping address mode /
	cudaAddressModeClamp,	cudaAddressModeClamp = 1, /*< Clamp to edge address mode /
	cudaAddressModeMirror	cudaAddressModeMirror = 2, /*< Mirror address mode /
		cudaAddressModeBorder = 3 /*< Border address mode /
	};	};


		/**
		* CUDA texture filter modes
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaTextureFilterMode	enum cudaTextureFilterMode
	{	{

	cudaFilterModePoint,	cudaFilterModePoint = 0, /*< Point filter mode /
	cudaFilterModeLinear	cudaFilterModeLinear = 1 /*< Linear filter mode /
	};	};


		/**
		* CUDA texture read modes
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	enum cudaTextureReadMode	enum cudaTextureReadMode
	{	{

	cudaReadModeElementType,	cudaReadModeElementType = 0, /**< Read texture as specified element
	cudaReadModeNormalizedFloat	type */
		cudaReadModeNormalizedFloat = 1 /*< Read texture as normalized float
		/
	};	};


		/**
		* CUDA texture reference
		*/
	/DEVICE_BUILTIN/	/DEVICE_BUILTIN/
	struct textureReference	struct textureReference
	{	{

		/**
		* Indicates whether texture reads are normalized or not
		*/
	int normalized;	int normalized;

		/**
		* Texture filter mode
		*/
	enum cudaTextureFilterMode filterMode;	enum cudaTextureFilterMode filterMode;

		/**
		* Texture address mode for up to 3 dimensions
		*/
	enum cudaTextureAddressMode addressMode[3];	enum cudaTextureAddressMode addressMode[3];

		/**
		* Channel descriptor for the texture reference
		*/
	struct cudaChannelFormatDesc channelDesc;	struct cudaChannelFormatDesc channelDesc;
	int __cudaReserved[16];	int __cudaReserved[16];
	};	};


		/** @} */
		/** @} / / END CUDART_TYPES */

	#endif /* !__TEXTURE_TYPES_H__ */	#endif /* !__TEXTURE_TYPES_H__ */

End of changes. 13 change blocks.
	7 lines changed or deleted	43 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/