cuda相关函数

内存相关

函数 形参 返回值 意义
__host__ ​ __device__ ​cudaError_t cudaMalloc ( void** devPtr, size_t size ) devPtr:指向分配内存
size:需要分配的大小
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorMemoryAllocation = 2:内存分配失败
在设备上分配内存
__host__ ​ __device__ ​cudaError_t cudaFree ( void* devPtr ) devPtr:需要释放的设备内存指针 cudaSuccess = 0:成功
cudaErrorInvalidDevicePointer = 17:不是有效的设备指针
释放设备内存
__host__ ​cudaError_t cudaMemcpy ( void* dst, const void* src, size_t count, cudaMemcpyKind kind ) dst:目的地址
src:源地址
count:字节数
kind:方向,包括: cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, cudaMemcpyDeviceToDevice, cudaMemcpyDefault
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidMemcpyDirection = 21:kind参数错误
从src拷贝count个字节到dst

设备属性相关

函数 形参 返回值 意义
__host__ ​ __device__ ​cudaError_t cudaGetDevice ( int* device ) device:返回正在运行的设备 cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
获取正则运行的设备
__host__ ​ __device__ ​cudaError_t cudaGetDeviceCount ( int* count ) count:返回可用设备的个数 cudaSuccess = 0:成功 获取设备个数
__host__ ​cudaError_t cudaGetDeviceProperties ( cudaDeviceProp* prop, int device ) prop:设备属性
device:设备号
cudaSuccess = 0:成功
cudaErrorInvalidDevice = 10:不是有效设备号
获取设备的性质
__host__ ​cudaError_t cudaChooseDevice ( int* device, const cudaDeviceProp* prop ) device:返回最符合的设备号
prop:设备属性
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
选择最符号的设备号
__host__ ​cudaError_t cudaMemcpyToSymbol ( const void* symbol, const void* src, size_t count, size_t offset = 0, cudaMemcpyKind kind = cudaMemcpyHostToDevice ) symbol:目的常量地址
src:源地址
count:拷贝字节数
offset:symbol起始地址的偏移
kind:传输方向
stream:流的方向
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidSymbol = 13:不是有效识别符
cudaErrorInvalidMemcpyDirection = 21:拷贝方向不对
cudaErrorNoKernelImageForDevice = 48:没有可用的设备映像
拷贝数据到常量内存

event相关

函数 形参 返回值 意义
__host__ ​cudaError_t cudaEventCreate ( cudaEvent_t* event ) event:返回创建的event obj cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorLaunchFailure = 4:执行kernel过程中发生故障,常见的有无效指针、访问月结等
cudaErrorMemoryAllocation = 2:没有足够多的内存可分配
创建event
__host__ ​ device ​cudaError_t cudaEventDestroy ( cudaEvent_t event ) event:销毁的event cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorLaunchFailure = 4:执行kernel过程中发生故障
销毁event
__host__ ​cudaError_t cudaEventElapsedTime ( float* ms, cudaEvent_t start, cudaEvent_t end ) ms:返回的时间差
start开始事件:
stop:停止事件
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidResourceHandle = 33:传给API的资源句柄不对,常见类型有 cudaStream_t and cudaEvent_t
cudaErrorLaunchFailure = 4:执行kernel过程中发生故障
计算两个事件的时间差
__host__ ​\ _device_ ​cudaError_t cudaEventRecord ( cudaEvent_t event, cudaStream_t stream = 0 ) event:需要记录的时间
stream:需要记录事件的流
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidResourceHandle = 33:传给API的资源句柄不对
cudaErrorLaunchFailure = 4:执行kernel过程中发生故障
记录某个事件
__host__ ​cudaError_t cudaEventSynchronize ( cudaEvent_t event ) event:等待完成的时间 cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidResourceHandle = 33:传给API的资源句柄不对
cudaErrorLaunchFailure = 4:执行kernel过程中发生故障
等待时间完成

texture

函数 形参 返回值 意义
template < class T, int dim, enum cudaTextureReadMode readMode >
__host__ ​cudaError_t cudaBindTexture ( size_t* offset, const texture < T, dim, readMode > & tex, const void* devPtr, size_t size = UINT_MAX )
offset:偏移,一般为NULL
tex:绑定的texture名
devPtr:被绑定的内存地址
size:绑定的大小
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidTexture = 18:绑定的texture不对
将devPtr绑定为texture内存
template < class T, int dim, enum cudaTextureReadMode readMode >
__host__ ​cudaError_t cudaBindTexture2D ( size_t* offset, const texture < T, dim, readMode > & tex, const void* devPtr, size_t width, size_t height, size_t pitch )
offset:偏移
tex:绑定的texture引用
devPtr:需要绑定的地址
cudaSuccess = 0:成功
cudaErrorInvalidValue = 11:形参不对
cudaErrorInvalidTexture = 18:绑定的texture不对
将devPtr绑定为texture2D内存
template < class T, int dim, enum cudaTextureReadMode readMode >
__host__ ​cudaError_t cudaBindTexture2D ( size_t* offset, const texture < T, dim, readMode > & tex, const void* devPtr, const cudaChannelFormatDesc& desc, size_t width, size_t height, size_t pitch )
template < class T, int dim, enum cudaTextureReadMode readMode >
__host__ ​cudaError_t cudaBindTexture ( size_t* offset, const texture < T, dim, readMode > & tex, const void* devPtr, const cudaChannelFormatDesc& desc, size_t size = UINT_MAX )
template < class T, int dim, enum cudaTextureReadMode readMode >
__host__ ​cudaError_t cudaUnbindTexture ( const texture < T, dim, readMode > & tex )
取消绑定
template
T tex1Dfetch(cudaTextureObject_t texObj, int x)
x:使用非标准化坐标 取数据
template
T tex1D(cudaTextureObject_t texObj, float x)
使用texture坐标
template
T tex2D(cudaTextureObject_t texObj, float x, float y)
使用texture坐标

参考链接:https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g37d37965bfb4803b6d4e59ff26856356

你可能感兴趣的:(C)