opencl学习(二)

看了一些结构介绍后,开始跟着别人的足迹写代码,先写大家经常起步的简单一维数组相加的opencl代码,以此熟悉opencl代码结构。此代码是在freescale的i.MX6q上跑的,通过交叉编译工具,加上opencl库和头文件等编译选项进行编译。

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
 
using namespace std;

int main(int argc, char**argv)
{
    cl_platform_id platform;
    cl_context context = 0;
     cl_command_queue queue = 0;
    cl_device_id device = 0;
    cl_kernel kernel = 0;
    cl_int errNum = 0;
     cl_uint numPlatforms = 0;
 
     //Platform
     errNum = clGetPlatformIDs( 1, &platform, &numPlatforms );
    if( errNum != CL_SUCCESS )
    {
         cout << "Error getting platform id:" <<endl;
        return 1;
     }
 
     //Device
	 errNum = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
     if( errNum != CL_SUCCESS )
     {
         cout << "Error getting device ids:"  <<endl;
         return 1;
      }

     //Context
      context = clCreateContext(0, 1, &device, NULL, NULL, &errNum);
     if( errNum != CL_SUCCESS )
     {
         cout << "Error creating context:" <<endl;
         return 1;
     }
 
     //Command-queue
     queue = clCreateCommandQueue( context, device, 0, &errNum );
     if( errNum != CL_SUCCESS )
     {
         cout << "Error creating command queue:" <<endl;
         return 1;
     }

	//create memory objects that will be used as arguements to 
	//kernel. First create host memory arrays that will be 
	//used to store the arguments to the kernel
	const int ARRAY_SIZE = 1024;	
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];
	for(int i=0; i<ARRAY_SIZE; i++)
	{
		a[i] = i*1.0;
		b[i] = i*2.0;
	}
	cl_mem memObjects[3] = { 0, 0, 0 };
	memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), a, &errNum);
	memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), b, &errNum);
	memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, ARRAY_SIZE*sizeof(float), NULL, &errNum);
	if( memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
	{
		cout << "Error creating memory object." << endl;
		return 1;
	}
	
	const char* source = "__kernel void helloworld(__global const float *a, \
						 __global const float *b, \
						 __global float *result) \
						{\
							int gid = get_global_id(0);\
							result[gid] = a[gid] + b[gid];\
						}";
	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, NULL );
	//assert( errNum == CL_SUCCESS );
	if( errNum != CL_SUCCESS )
	{
		cout << "Error creating program!" <<endl;
		return 1;
	}

	//builds the program
	errNum = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
	//assert( errNum == CL_SUCESS );
	if( errNum != CL_SUCCESS )
	{
		cout << "Error building program!" <<endl;
		return 1;
	}

	//Shows the log
	char* build_log;
	size_t log_size;
	//first call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
	build_log = new char[ log_size+1 ];
	//second call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL );
	build_log[log_size] = '\0';
	cout << build_log << endl;
	delete[] build_log;
	
	//extracting the kernel
	kernel = clCreateKernel( program, "helloworld", &errNum);	
	if( errNum != CL_SUCCESS )
	{
		switch(errNum){
			case CL_INVALID_PROGRAM:printf("1");break;		
			case CL_INVALID_PROGRAM_EXECUTABLE:printf("2");break;		
			case CL_INVALID_KERNEL_NAME:printf("3");break;
			case CL_INVALID_KERNEL_DEFINITION:printf("4");break;		
			case CL_INVALID_VALUE:printf("5");break;		
			case CL_OUT_OF_RESOURCES:printf("6");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("7");break;		
						}
		cout << "Error create kernel." <<endl;
		return 1;
	}
	

	//set the kernel argument (result, a, b)
	errNum = clSetKernelArg(kernel, 0, 
									sizeof(cl_mem), &memObjects[0]);
	if( errNum != CL_SUCCESS )
	{
		
		cout << "Error setting kernel arguments 1." <<endl;
		return 1;
	}

	errNum |= clSetKernelArg(kernel, 1, 
									sizeof(cl_mem), &memObjects[1]);
	if( errNum != CL_SUCCESS )
	{
		cout << "Error setting kernel arguments 2." <<endl;
		return 1;
	}

	errNum |= clSetKernelArg(kernel, 2, 
									sizeof(cl_mem), &memObjects[2]);

	if( errNum != CL_SUCCESS )
	{
		cout << "Error setting kernel arguments 3." <<endl;
		return 1;
	}

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };

	//queue the kernel up for execution across the array
	errNum = clEnqueueNDRangeKernel( queue, kernel, 1, NULL,
									globalWorkSize, localWorkSize,
									0, NULL, NULL);
	if( errNum != CL_SUCCESS )
	{
		cout << "Error queuing kernel for execution." <<endl;
		return 1;
	}

	
	//read the output buffer back to the Host
	errNum = clEnqueueReadBuffer(queue, memObjects[2],
								CL_TRUE, 0, ARRAY_SIZE * sizeof(float),
								result, 0, NULL, NULL);
	if( errNum != CL_SUCCESS )
	{
					
		switch(errNum){
			case CL_INVALID_COMMAND_QUEUE:printf("1");break;		
			case CL_INVALID_CONTEXT:printf("2");break;		
			case CL_INVALID_MEM_OBJECT:printf("3");break;
			case CL_INVALID_VALUE:printf("4");break;		
			case CL_INVALID_EVENT_WAIT_LIST:printf("5");break;		
			case CL_MISALIGNED_SUB_BUFFER_OFFSET:printf("6");break;		
			case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:printf("7");break;		
			case CL_MEM_OBJECT_ALLOCATION_FAILURE:printf("8");break;		
			case CL_OUT_OF_RESOURCES:printf("9");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("10");break;		
						}
		printf("\n");
		cout << "Error reading result buffer." <<endl;
		return 1;
	}

printf("after output\n");

	//output the result buffer
	for( int i = 0; i < ARRAY_SIZE; i++)
	{
		cout << result[i] << " ";
	}
	
	cout << endl;
	cout << "executed program successfully." << endl;
	
	delete[] a;
	delete[] b;
	delete[] result;
	clReleaseKernel(kernel);
	clReleaseCommandQueue(queue);
	clReleaseContext(context);
	clReleaseMemObject(memObjects[0]);
	clReleaseMemObject(memObjects[1]);
	clReleaseMemObject(memObjects[2]);
	return 0;

}


你可能感兴趣的:(OpenCL)