openCl环境搭建及示例

引用:http://bbs.csdn.net/topics/390899158(概念问题)

http://blog.csdn.net/hermittt/article/details/50668850(平台搭建及示例)


硬件:intel(R) HD Graphics 4400

win7_x64+vs2010+opencl_sdk1.2

前提:Gpu或者cpu支持OpenCL

方法:GPU_Caps_Viewer

1. 去intel官网下载openclSdk(opencl是跨平台的标准而非跨平台的工具)AMD,Intel,Nvd都有不同的sdk,但接口都是根据Khronos上发布接口标准

2.安装intel版的sdk,然后提取其中的include和lib

3.vs2010,新建项目,在属性中->vc++目录->包含目录中添加Include的路径,链接器->附加库目录->添加x86_lib的目录(x64不能用,还没有找到原因),

链接器->输入->附加依赖项中添加OpenCl.lib

4.添加如下代码

// newTutorial1.cpp : Defines the entrypoint for the console application.

//

//#include "stdafx.h"(mfc的头文件,此处不需要)

#include

#include

#include

#include

#include

#include

 

using namespace std;

#define NWITEMS 62144

 

#pragma comment(lib,"OpenCL.lib")

 

//把文本文件读入一个string中

int convertToString(const char *filename,std::string& s)

{

   size_t size;

   char*  str;

 

   std::fstream f(filename, (std::fstream::in | std::fstream::binary));

 

   if(f.is_open())

    {

       size_t fileSize;

       f.seekg(0, std::fstream::end);

       size = fileSize = (size_t)f.tellg();

       f.seekg(0, std::fstream::beg);

 

       str = new char[size+1];

       if(!str)

       {

           f.close();

           return NULL;

       }

 

       f.read(str, fileSize);

       f.close();

       str[size] = '\0';

   

       s = str;

       delete[] str;

       return 0;

    }

    printf("Error: Failed to open file%s\n", filename);

   return 1;

}

 

int main(int argc, char* argv[])

{

         //在host内存中创建三个缓冲区

         float*buf1 = 0;

         float*buf2 = 0;

         float*buf = 0;

   

         buf1=(float *)malloc(NWITEMS * sizeof(float));

         buf2=(float *)malloc(NWITEMS * sizeof(float));

         buf=(float *)malloc(NWITEMS * sizeof(float));

 

         //初始化buf1和buf2的内容

         inti;

         srand((unsigned)time( NULL ) );

   for(i = 0; i < NWITEMS; i++)

                   buf1[i]= rand()%65535;

 

         srand((unsigned)time( NULL ) +1000);

   for(i = 0; i < NWITEMS; i++)

           buf2[i] = rand()%65535;

 

         for(i= 0; i < NWITEMS; i++)

           buf[i] = buf1[i] + buf2[i];

        

         cl_intstatus;

         cl_uintnumPlatforms;

         cl_platform_idplatform;

   cl_device_id device;

         cl_contextcontext;

         cl_command_queuequeue;

   const char * filename  ="mul.cl";

   std::string  sourceStr;

   status = convertToString(filename, sourceStr);

   const char * source    =sourceStr.c_str();

   size_t sourceSize[]    = {strlen(source) };

 

         status= clGetPlatformIDs( 1, &platform, NULL);//创建平台对象

   status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device,NULL);//创建GPU设备

   context = clCreateContext( NULL, 1, &device, NULL, NULL,&status);//创建context

   queue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE,&status);//创建命令队列

   

   cl_program program = clCreateProgramWithSource(context, 1,&source,sourceSize, NULL);//创建程序对象

   status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );//编译程序对象

        

         if(status!= 0)

         {

                   printf("clBuildfailed:%d\n", status);

                   chartbuf[0x10000];

                   clGetProgramBuildInfo(program,device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);

                   printf("\n%s\n",tbuf);

                   return-1;

         }

         //创建三个OpenCL内存对象,并把buf1的内容通过隐式拷贝的方式

         //拷贝到clbuf1,buf2的内容通过显示拷贝的方式拷贝到clbuf2

   cl_mem clbuf1 = clCreateBuffer(context,

                   CL_MEM_READ_ONLY| CL_MEM_COPY_HOST_PTR,

                   NWITEMS*sizeof(cl_float),buf1,

                   NULL);

 

         cl_memclbuf2 = clCreateBuffer(context,

                   CL_MEM_READ_ONLY,

                   NWITEMS*sizeof(cl_float),NULL,

                   NULL);

 

   status = clEnqueueWriteBuffer(queue, clbuf2, 1,

                                  0,NWITEMS*sizeof(cl_float), buf2, 0, 0, 0);

 

         cl_membuffer = clCreateBuffer( context,

                   CL_MEM_WRITE_ONLY,

                   NWITEMS* sizeof(cl_float),

                   NULL,NULL );

         //创建Kernel对象

         cl_kernelkernel = clCreateKernel( program, "dp_mul", NULL );

         //设置Kernel参数

         cl_intclnum = NWITEMS;

         clSetKernelArg(kernel,0, sizeof(cl_mem), (void*) &clbuf1);

         clSetKernelArg(kernel,1, sizeof(cl_mem), (void*) &clbuf2);

         clSetKernelArg(kernel,2, sizeof(cl_mem), (void*) &buffer);

 

         //执行kernel

         cl_eventev;

         size_tglobal_work_size = NWITEMS;

         clEnqueueNDRangeKernel(queue,

                   kernel,

                   1,

                   NULL,

                   &global_work_size,

                   NULL,0, NULL, &ev);

         clFinish(queue );

 

         //数据拷回host内存

         cl_float*ptr;

         ptr= (cl_float *) clEnqueueMapBuffer( queue,

                   buffer,

                   CL_TRUE,

                   CL_MAP_READ,

                   0,

                   NWITEMS* sizeof(cl_float),

                   0,NULL, NULL, NULL );

        

   //结果验证,和cpu计算的结果比较

         if(!memcmp(buf,ptr, NWITEMS))

                   printf("Verifypassed\n");

         elseprintf("verify failed");

 

         if(buf)

                   free(buf);

         if(buf1)

                   free(buf1);

         if(buf2)

                   free(buf2);

 

         //删除OpenCL资源对象

         clReleaseMemObject(clbuf1);

         clReleaseMemObject(clbuf2);

   clReleaseMemObject(buffer);

         clReleaseProgram(program);

         clReleaseCommandQueue(queue);

         clReleaseContext(context);

         return0;

}

 5.添加kernel文件到项目中

代码如下:mul.cl

__kernel void dp_mul(__global const float* A, __global const float* B, __global float* C)
{
    int id = get_global_id(0);
    C[id] = A[id] * B[id];
}
inline void sfs()
{
}

6.编译

error:出现如此错误提示“clCreateCommandQueue 被声明为已否决”

原因:Intel的OpenCL的sdk已经是2.0的,clCreateCommandQueue是1.2版本的已经被弃用

解决:方案1:添加宏定义:

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS

方案2:使用2.0的接口

clCreateCommandQueueWithProperties


你可能感兴趣的:(OpenGL)