引用:http://bbs.csdn.net/topics/390899158(概念问题)
http://blog.csdn.net/hermittt/article/details/50668850(平台搭建及示例)
硬件:intel(R) HD Graphics 4400
win7_x64+vs2010+opencl_sdk1.2
前提:Gpu或者cpu支持OpenCL
方法:GPU_Caps_Viewer
1. 去intel官网下载openclSdk(opencl是跨平台的标准而非跨平台的工具)AMD,Intel,Nvd都有不同的sdk,但接口都是根据Khronos上发布接口标准
2.安装intel版的sdk,然后提取其中的include和lib
3.vs2010,新建项目,在属性中->vc++目录->包含目录中添加Include的路径,链接器->附加库目录->添加x86_lib的目录(x64不能用,还没有找到原因),
链接器->输入->附加依赖项中添加OpenCl.lib
4.添加如下代码
// newTutorial1.cpp : Defines the entrypoint for the console application.
//
//#include "stdafx.h"(mfc的头文件,此处不需要)
#include
#include
#include
#include
#include
#include
using namespace std;
#define NWITEMS 62144
#pragma comment(lib,"OpenCL.lib")
//把文本文件读入一个string中
int convertToString(const char *filename,std::string& s)
{
size_t size;
char* str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if(f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size+1];
if(!str)
{
f.close();
return NULL;
}
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file%s\n", filename);
return 1;
}
int main(int argc, char* argv[])
{
//在host内存中创建三个缓冲区
float*buf1 = 0;
float*buf2 = 0;
float*buf = 0;
buf1=(float *)malloc(NWITEMS * sizeof(float));
buf2=(float *)malloc(NWITEMS * sizeof(float));
buf=(float *)malloc(NWITEMS * sizeof(float));
//初始化buf1和buf2的内容
inti;
srand((unsigned)time( NULL ) );
for(i = 0; i < NWITEMS; i++)
buf1[i]= rand()%65535;
srand((unsigned)time( NULL ) +1000);
for(i = 0; i < NWITEMS; i++)
buf2[i] = rand()%65535;
for(i= 0; i < NWITEMS; i++)
buf[i] = buf1[i] + buf2[i];
cl_intstatus;
cl_uintnumPlatforms;
cl_platform_idplatform;
cl_device_id device;
cl_contextcontext;
cl_command_queuequeue;
const char * filename ="mul.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);
const char * source =sourceStr.c_str();
size_t sourceSize[] = {strlen(source) };
status= clGetPlatformIDs( 1, &platform, NULL);//创建平台对象
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device,NULL);//创建GPU设备
context = clCreateContext( NULL, 1, &device, NULL, NULL,&status);//创建context
queue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE,&status);//创建命令队列
cl_program program = clCreateProgramWithSource(context, 1,&source,sourceSize, NULL);//创建程序对象
status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );//编译程序对象
if(status!= 0)
{
printf("clBuildfailed:%d\n", status);
chartbuf[0x10000];
clGetProgramBuildInfo(program,device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("\n%s\n",tbuf);
return-1;
}
//创建三个OpenCL内存对象,并把buf1的内容通过隐式拷贝的方式
//拷贝到clbuf1,buf2的内容通过显示拷贝的方式拷贝到clbuf2
cl_mem clbuf1 = clCreateBuffer(context,
CL_MEM_READ_ONLY| CL_MEM_COPY_HOST_PTR,
NWITEMS*sizeof(cl_float),buf1,
NULL);
cl_memclbuf2 = clCreateBuffer(context,
CL_MEM_READ_ONLY,
NWITEMS*sizeof(cl_float),NULL,
NULL);
status = clEnqueueWriteBuffer(queue, clbuf2, 1,
0,NWITEMS*sizeof(cl_float), buf2, 0, 0, 0);
cl_membuffer = clCreateBuffer( context,
CL_MEM_WRITE_ONLY,
NWITEMS* sizeof(cl_float),
NULL,NULL );
//创建Kernel对象
cl_kernelkernel = clCreateKernel( program, "dp_mul", NULL );
//设置Kernel参数
cl_intclnum = NWITEMS;
clSetKernelArg(kernel,0, sizeof(cl_mem), (void*) &clbuf1);
clSetKernelArg(kernel,1, sizeof(cl_mem), (void*) &clbuf2);
clSetKernelArg(kernel,2, sizeof(cl_mem), (void*) &buffer);
//执行kernel
cl_eventev;
size_tglobal_work_size = NWITEMS;
clEnqueueNDRangeKernel(queue,
kernel,
1,
NULL,
&global_work_size,
NULL,0, NULL, &ev);
clFinish(queue );
//数据拷回host内存
cl_float*ptr;
ptr= (cl_float *) clEnqueueMapBuffer( queue,
buffer,
CL_TRUE,
CL_MAP_READ,
0,
NWITEMS* sizeof(cl_float),
0,NULL, NULL, NULL );
//结果验证,和cpu计算的结果比较
if(!memcmp(buf,ptr, NWITEMS))
printf("Verifypassed\n");
elseprintf("verify failed");
if(buf)
free(buf);
if(buf1)
free(buf1);
if(buf2)
free(buf2);
//删除OpenCL资源对象
clReleaseMemObject(clbuf1);
clReleaseMemObject(clbuf2);
clReleaseMemObject(buffer);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return0;
}
5.添加kernel文件到项目中
代码如下:mul.cl
__kernel void dp_mul(__global const float* A, __global const float* B, __global float* C)
{
int id = get_global_id(0);
C[id] = A[id] * B[id];
}
inline void sfs()
{
}
6.编译
error:出现如此错误提示“clCreateCommandQueue 被声明为已否决”
原因:Intel的OpenCL的sdk已经是2.0的,clCreateCommandQueue是1.2版本的已经被弃用
解决:方案1:添加宏定义:
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
方案2:使用2.0的接口
clCreateCommandQueueWithProperties