最近负责的几个项目需要使用opencl进行编程,进行了学习,并将学习后编写的主要Demo代码记录下来,供大家初步入门使用。
opencl的介绍,原理等这里就不说了,百度一下有很多,直接切入主题。
这个demo实现两个数组的相加操作。
1.进行平台的初始化相关操作
int initPlatform(TPlatformObject* tplatformObj)
{
cl_int err= CL_SUCCESS;
cl_uint num_platforms;
char platformInfo[100];
char deviceInfo[100];
size_t nameLen;
printInfo("InitPlatform\n");
/*获取opencl执行的平台*/
err=clGetPlatformIDs(0,NULL,&num_platforms);
if(err != CL_SUCCESS){
printErr("initPlatform_clGetPlatformIDS0 failed!err:%d",err);
return err;
}
err=clGetPlatformIDs(1,&tplatformObj->platform,NULL);
if(err != CL_SUCCESS){
printErr("initPlatform_clGetPlatformIDS1 failed!err:%d",err);
return err;
}
err=clGetPlatformInfo(tplatformObj->platform,CL_PLATFORM_VENDOR,100,platformInfo,&nameLen);
if(err != CL_SUCCESS){
printErr("initPlatform_clGetPlatforminfo failed!err:%d",err);
return err;
}
printInfo("CL_PLATFORM_VENDOR:%s\n",platformInfo);
/*获取平台执行设备*/
err=clGetDeviceIDs(tplatformObj->platform,CL_DEVICE_TYPE_GPU,1,&tplatformObj->device,NULL);
if(err != CL_SUCCESS){
printErr("initPlatform_clGetPlatforminfo failed!err:%d",err);
return err;
}
err=clGetDeviceInfo(tplatformObj->device,CL_DEVICE_VERSION,100,deviceInfo,&nameLen);
printInfo("initPlatform_CL_DRIVER_VERSION:%s\n",deviceInfo);
/*创建context*/
tplatformObj->context = clCreateContext(NULL,1,&tplatformObj->device,NULL,NULL,&err);
if(err != CL_SUCCESS){
printErr("initPlatform_clCreateContext:%d",err);
return err;
}
/*创建命令队列*/
tplatformObj->queue = clCreateCommandQueue(tplatformObj->context,tplatformObj->device, 0, &err);
if(err != CL_SUCCESS){
printErr("initPlatform_clCreateCommandQueue:%d",err);
return err;
}
return 0;
}
2.加载kernel函数,执行,直接整个文件拷贝过来了
#include "Rotate.h"
/*初始化工程*/
int initProgram(TPlatformObject* tplatformObj)
{
FILE* fp=NULL; /*文件指针指向*.cl内核代码文件*/
char* program_buffer=NULL; /*用于拷贝*.cl中的内容*/
size_t program_size; /**.cl文件大小*/
cl_int errCode=0;
fp=fopen("Operation.cl","r");
if(fp == NULL){
printErr("initProgram:fp is NULL\n");
return -1;
}
fseek(fp,0,SEEK_END); /*文件指针指向文件结尾*/
program_size=ftell(fp); /*计算出文件开头与fp指针位置之间的数据大小*/
rewind(fp); /*让fp指针重新回到文件头位置*/
program_buffer=(char*)malloc(program_size+1); /*分配内存*/
program_buffer[program_size]='\0'; /*添加结尾标志*/
fread(program_buffer,sizeof(char),program_size,fp);
fclose(fp);
/*创建程序对象*/
tplatformObj->program=clCreateProgramWithSource(tplatformObj->context,
1,(const char**) &program_buffer,&program_size,&errCode);
if(errCode != CL_SUCCESS){
printErr("initProgram-Couldn't create the program,errcode=%d\n",errCode);
free(program_buffer);
program_buffer=NULL;
return -1;
}
free(program_buffer);
program_buffer=NULL;
/*构建程序执行体*/
errCode=clBuildProgram(tplatformObj->program,0,NULL,NULL,NULL,NULL);
if(errCode != CL_SUCCESS){
printErr("initProgram_clBuildProgram failed!errcode is :%d\n",errCode);
return -1;
}
return 0;
}
/*获取创建的kernel函数句柄*/
int CreateKernel(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
cl_int errCode=0;
tRotObj->VecAdd=clCreateKernel(tplatformObj->program,"vecadd",&errCode);
if(errCode != CL_SUCCESS){
printErr("CreateKernel failed! errCode:%d\n",errCode);
return -1;
}
return 0;
}
/*创建buffer*/
int CreateBuffer(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
cl_int errCode=0;
tRotObj->cl_A=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1024*sizeof(float),(void*)tRotObj->A,&errCode);
if(errCode != CL_SUCCESS){
printErr("clCreateBuffer cl_A failed!!");
}
tRotObj->cl_B=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1024*sizeof(float),(void*)tRotObj->B,&errCode);
if(errCode != CL_SUCCESS){
printErr("clCreateBuffer cl_B failed!!");
}
tRotObj->cl_C=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1024*sizeof(float),(void*)tRotObj->C,&errCode);
if(errCode != CL_SUCCESS){
printErr("clCreateBuffer cl_B failed!!");
}
return 0;
}
/*为kernel函数设置参数*/
int SetKernelArgs(TRotObj* tRotObj)
{
cl_int errCode=CL_SUCCESS;
errCode=clSetKernelArg(tRotObj->VecAdd,0,sizeof(cl_mem),&tRotObj->cl_A);
errCode=clSetKernelArg(tRotObj->VecAdd,1,sizeof(cl_mem),&tRotObj->cl_B);
errCode=clSetKernelArg(tRotObj->VecAdd,2,sizeof(cl_mem),&tRotObj->cl_C);
return errCode;
}
int RotateOpen(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
int ret=0;
int i=0;
ret=initProgram(tplatformObj);
if(ret != CL_SUCCESS){
printErr("RotateOpen initProgram failed!!");
}
printInfo("initProgram done!!\n");
/*创建kernel可执行函数*/
if(CreateKernel(tplatformObj,tRotObj) != CL_SUCCESS){
printErr("RotateOpen CreateKernel failed!!");
return -1;
}
/*测试用数组*/
float* A=(float*)malloc(1024*sizeof(float));
float* B=(float*)malloc(1024*sizeof(float));
float* C=(float*)malloc(1024*sizeof(float));
memset(A,1,sizeof(float)*1024);
memset(B,2,sizeof(float)*1024);
for(i=0;i<1024;i++){
A[i]=i;
B[i]=i;
}
memset(C,0,sizeof(float)*1024);
tRotObj->A=A;
tRotObj->B=B;
tRotObj->C=C;
/*创建buffer*/
if(CreateBuffer(tplatformObj,tRotObj) != 0){
printErr("RotateOpen CreateBuffer failed!!");
return -1;
}
/*设置kernel的参数*/
if(SetKernelArgs(tRotObj)){
printErr("RotateOpen SetKernelArgs failed!!");
return -1;
}
/*执行kernel函数*/
cl_uint work_dim=1; /*工作项维数*/
size_t global_work_size=1024;
ret=clEnqueueNDRangeKernel(tplatformObj->queue,tRotObj->VecAdd,work_dim,
NULL,&global_work_size,NULL,0,NULL,NULL);
if(ret != CL_SUCCESS){
printErr("RotateOpen clEnqueueNDRangeKernel failed!!");
return -1;
}
clFinish(tplatformObj->queue);/*执行结束*/
/*读取计算出的结果*/
ret=clEnqueueReadBuffer(tplatformObj->queue,tRotObj->cl_C,CL_TRUE,0,1024*4,tRotObj->C,0,NULL,NULL);
/*打印程序结果*/
for(i=0;i
if((i+1)%16==0){
printf("\n");
}
}
/*释放内存*/
clReleaseCommandQueue(tplatformObj->queue);
clReleaseContext(tplatformObj->context);
clReleaseDevice(tplatformObj->device);
clReleaseKernel(tRotObj->VecAdd);
clReleaseMemObject(tRotObj->cl_A);
clReleaseMemObject(tRotObj->cl_B);
free(tRotObj->A);
free(tRotObj->B);
free(tRotObj->C);
return ret;
}