1.获取指定系统上可用的计算平台,使用函数:
cl_int clGetPlatformIDs( |
cl_uint num_entries, |
cl_platform_id *platforms, | |
cl_uint *num_platforms) |
eg:
cl_int err;
cl_uint nPlatform;
cl_platform_id *listPlatform;
cl_device_id *listDevice;
err = clGetPlatformIDs(0, NULL, &nPlatform);
if(err < 0) {
perror("Couldn't find any platforms.");
exit(1);
}
listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
clGetPlatformIDs(nPlatform, listPlatform, NULL);
clGetPlatformIDs 由应用程序调用两次。
第一次调用:将NULL 和 cl_uint 指针(&nPlatform)分别传递给形参 *platforms 和 *num_platforms,获取platform 数量;以便分配空间来存储平台信息。
第二次调用:将 cl_platform_id 指针(listPlatform)传递给形参 *platforms,为平台分配足够空间的具体实现。
2.获取某平台上设备的名称、类型、设备厂商等相关信息,使用函数:
cl_int clGetDeviceIDs( |
cl_platform_id platform, |
cl_device_type device_type, | |
cl_uint num_entries, | |
cl_device_id *devices, | |
cl_uint *num_devices) |
eg:
err = clGetDeviceIDs(listPlatform[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice);
if(err < 0) {
perror("Couldn't find any devices");
exit(1);
}
listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
clGetDeviceIDs(listPlatform[i], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
clGetDeviceIDs由应用程序调用两次,同 clGetPlatformIDs。
3.上下文(Context ),使用函数:
cl_context clCreateContext( |
cl_context_properties *properties, |
cl_uint num_devices, | |
const cl_device_id *devices, | |
void *pfn_notify ( const char *errinfo, ) , |
|
void *user_data, | |
cl_int *errcode_ret) |
eg:
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
LOGD("create context fail !\n");
exit(1);
}
作用:
1)协调主机同设备之间的交互机制;
2)管理设备上可用的内存对象;
3)跟踪针对每个设备新建的kernel 和程序;
4.命令队列(Command Queue),使用函数:
cl_command_queue clCreateCommandQueue( |
cl_context context, |
cl_device_id device, | |
cl_command_queue_properties properties, | |
cl_int *errcode_ret) |
eg:
queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
LOGD("create command queue fail !\n");
exit(1);
}
作用:
1)通过提交命令到命令队列开始与设备进行通信;
2)一种主机端向设备端发送请求的机制;
3)每个命令队列只关联一个设备,即每个设备创建一个命令队列;
5.事件(Event),任何以clEnqueue 开头的API 调用,都会产生一个事件:
作用:
1)表示依赖;
2)提供程序剖析机制;
6.内存(buffer)对象,使用函数:
cl_mem clCreateBuffer ( |
cl_context context, |
cl_mem_flags flags, | |
size_t size, | |
void *host_ptr, | |
cl_int *errcode_ret) |
eg:
cl_mem inbuf_a ;
cl_mem inbuf_b ;
cl_mem outbuf_r ;
inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer inbuf_a fail !\n");
exit(1);
}
inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer inbuf_b fail !\n");
exit(1);
}
outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
if(error!=CL_SUCCESS){
LOGD("create buffer outbuf_r fail !\n");
exit(1);
}
作用:
1)新建buffer,进行上下文相关联设备的输入输出数据;
7.OpenCL C代码,称为kernel 的函数的集合,kernel是在设备上执行的程序:
eg:
const char *kernelSourceCode2 = KERNEL(
__kernel void warp(__global uint *buffer)
{
size_t gidx = get_global_id(0);
size_t gidy = get_global_id(1);
buffer[gidx + 4 * gidy] = (1 << gidx) | (0x10 << gidy);
}
);
或者:
const char* program_src = ""
"__kernel void vector_add_gpu (__global const float* src_a,\n"
" __global const float* src_b,\n"
" __global float* res,\n"
" const int num)\n"
"{\n"
" int idx = get_global_id(0);\n"
" if(idx
步骤如下:
1)源代码以字符串形式存储;
2)通过调用函数 clCreateProgramWithSource 将其转换成一个cl_program对象;
cl_program clCreateProgramWithSource ( |
cl_context context, |
cl_uint count, | |
const char **strings, | |
const size_t *lengths, | |
cl_int *errcode_ret) |
eg:
cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
LOGD("create program fail !\n");
exit(1);
}
3)通过调用函数 clBuildProgram 在多个支持OpenCL 的设备上编译程序对象;
cl_int clBuildProgram ( |
cl_program program, |
cl_uint num_devices, | |
const cl_device_id *device_list, | |
const char *options, | |
void (*pfn_notify)(cl_program, void *user_data), | |
void *user_data) |
eg:
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
LOGD("build program fail !\n");
clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
LOGD("build_log : %s\n",build_log);
exit(1);
}
4)通过调用函数 clSetKernelArg 将所需的内存对象全部传输到设备上;
cl_int clSetKernelArg ( |
cl_kernel kernel, |
cl_uint arg_index, | |
size_t arg_size, | |
const void *arg_value) |
eg:
error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
if(error!=CL_SUCCESS){
LOGD("set kernel arg fail !\n");
exit(1);
}
5)通过调用 clEnqueueNDRangeKernel 开始执行kernel 程序;
cl_int clEnqueueNDRangeKernel ( |
cl_command_queue command_queue, |
cl_kernel kernel, | |
cl_uint work_dim, | |
const size_t *global_work_offset, | |
const size_t *global_work_size, | |
const size_t *local_work_size, | |
cl_uint num_events_in_wait_list, | |
const cl_event *event_wait_list, | |
cl_event *event) |
eg:
local_ws = 256;
nMaxWorkItemSizes[0]=256
global_ws = shrRoundUp(local_ws,size);
LOGD("local_ws=%d,global_ws=%d\n",local_ws,global_ws);
error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
if(error!=CL_SUCCESS){
LOGD("enqueue kernel fail !\n");
exit(1);
}
参数解释:
1)cl_command_queue command_queue :加入队列
2)cl_kernel kernel :kernel句柄
3)cl_uint work_dim :项目维度(类似于for语句的几重循环)
4)const size_t *global_work_offset :获取初始索引号,(类似于for语句的循环从哪里开始)
5)const size_t *global_work_size :总的项目数(类似于for语句总的执行次数)
6)const size_t *local_work_size :每个组内的项目数(必须能被总的项目是除尽)
7)cl_uint num_events_in_wait_list:等待事件数
8)const cl_event *event_wait_list:等待事件
9)cl_event *event:事件
8.将内存对象的结果回读到Host,使用函数:
cl_int clEnqueueReadBuffer ( |
cl_command_queue command_queue, |
cl_mem buffer, | |
cl_bool blocking_read, | |
size_t offset, | |
size_t cb, | |
void *ptr, | |
cl_uint num_events_in_wait_list, | |
const cl_event *event_wait_list, | |
cl_event *event) |
eg:
error = clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
if (status != CL_SUCCESS) {
LOGD("Error: Read buffer queue\n");
exit(1);
}
9.资源回收,使用函数:
status = clReleaseKernel(kernel);
status = clReleaseProgram(program);
status = clReleaseMemObject(outputBuffer);
status = clReleaseCommandQueue(commandQueue);
status = clReleaseContext(context);
10.基本API: