在网上找了一些例子,测试了一下都是无法运行的。下面给出一个在msm8953测试通过的可以运行的openc的实例。当然这个例子也是在别人的基础之上修改的,具体代码如下
1.Android.mk
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/Android.mk
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE_TAGS := optional
LOCAL_MODULE := opencl
LOCAL_SRC_FILES := main.cpp
LOCAL_C_INCLUDES := $(LOCAL_PATH)
LOCAL_C_INCLUDES += $(TARGET_OUT_INTERMEDIATES)/include/adreno/
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/obj/include/adreno/CL/cl.h
#./vendor/qcom/proprietary/prebuilt_HY11/target/product/msm8953_64/system/vendor/lib64/libOpenCL.so
LOCAL_LDFLAGS += -lOpenCL
include $(BUILD_EXECUTABLE)
2.cl_kernel2.cl
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/cl_kernel2.cl
__kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
int index = get_global_id(0);
dst[index] = src1[index] + src2[index];
}
3.
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/opencl/main.cpp
#include
#include
#include "readyuv.h"
#include "CL/cl.h"
//#include "CL/cl_platform.h"
#include
using namespace std;
int main(void)
{
const int array_size = 1024;
cl_uint numPlatforms = 0;
cl_platform_id platform = nullptr;
cl_context context = nullptr;
cl_command_queue commandQueue = nullptr;
cl_program program = nullptr;
cl_mem input1MemObj = nullptr;
cl_mem input2MemObj = nullptr;
cl_mem outputMemObj = nullptr;
cl_kernel kernel = nullptr;
//step1. 查询OpenCL平台集合
/*
* 这个函数一般被调用两次:
* 第一次调用这个函数是获得可用平台的数目, 然后为平台对象分配内存空间
* 第二次调用用来获取平台对象
*/
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS)
{
cout << "Error: Getting platforms!" << endl;
return -1;
}
if (numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms* sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform = platforms[0];
}
else
{
puts("Your system does not have any OpenCL platform!");
return -1;
}
cl_uint numDevices = 0;
cl_device_id *devices;
cl_int errcode_ret ;
//setp2.获取一个平台上的可用设备清单
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices == 0) //no GPU available.
{
cout << "No GPU device available." << endl;
cout << "Choose CPU as default device." << endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
}
else
{
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
cout << "The number of devices: " << numDevices << endl;
}
//step3.创建一个OpenCL上下文
/*
*上下文是 OpenCL runtime 用来管理像命令队列、内存队列、程序对象和内核对象,并且在上下文中指定的一个或多个设备上执行内核函数。
*/
context = clCreateContext(NULL, 1, devices, NULL, NULL, &errcode_ret);
if(errcode_ret == CL_SUCCESS)
cout << "craet OpenCL runtime sucess." << endl;
else
return -1;
//step4. 创建命令队列
commandQueue = clCreateCommandQueue(context, devices[0], 0, &errcode_ret);
if(errcode_ret == CL_SUCCESS)
cout << "craet CommandQueue sucess." << endl;
else
return -1;
char *kernelCodeBuffer = nullptr;
char *Buffer = nullptr;
const char *aSource = nullptr;
size_t kernelLength = 0;
size_t global_work_size[1] = { array_size };
// Read the kernel code to the buffer
FILE *fp = fopen("cl_kernel2.cl", "rb");
if (fp == nullptr)
{
puts("The kernel file not found!");
goto RELEASE_RESOURCES;
}
fseek(fp, 0, SEEK_END);
kernelLength = ftell(fp);
cout << "kernelLength : " << kernelLength << endl;
fseek(fp, 0, SEEK_SET);
kernelCodeBuffer = (char*)malloc(kernelLength + 1);
Buffer = (char*)malloc(kernelLength + 1);
fread(kernelCodeBuffer, 1, kernelLength, fp);
kernelCodeBuffer[kernelLength] = '\0';
fclose(fp);
aSource = kernelCodeBuffer;
strcpy(Buffer,aSource);
cout << "kernelCodeBuffer : " << Buffer << endl;
//step5. 创建程序对象
program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL);
//step6. 编译程序对象
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
// Do initialization
int i;
int input1Buffer[array_size];
int input2Buffer[array_size];
int outputBuffer[array_size];
for (i = 0; i < array_size; i++)
input1Buffer[i] = input2Buffer[i] = i + 1;
memset(outputBuffer, 0, sizeof(outputBuffer));
// Create mmory object
input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input1Buffer, nullptr);
input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, array_size * sizeof(int), input2Buffer, nullptr);
outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, array_size * sizeof(int), NULL, NULL);
//step7.创建内核对象
kernel = clCreateKernel(program, "cl_add", NULL);
//step8.设置内核参数,想要执行内核,就必须设置内核参数
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputMemObj);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&input1MemObj);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&input2MemObj);
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*
* clEnqueueNDRangeKernel(
*
* cl_command_queue queue,
*
* cl_kernel kernel,
*
* cl_uint work_dims,
* //if you deal with image object, you should probably set work_dims equal 2 or 3. But for buffer objects, you can set whatever dimensionality you think best.
* //For a buffer object containing a two-dimensional matrix, you might set work-dims equal 2.
*
* const size_t *global_work_offset,
* //the global ID offset in each dimension
*
* const size_t *global_work_size,
* //the number of work items in each dimension
* const size_t *local_work_size,
* //the number of work_items in a work_group,in each dimension
* cl_uint num_events,
*
* const cl_event *wait_list,
*
* cl_event *event)
*/
/*
* __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
* {
* int index = get_global_id(0);//get_global_id获取线程索引,其取值范围是0 到 global_work_size - 1
* dst[index] = src1[index] + src2[index];
* }
*/
clFinish(commandQueue);
//GPU中执行完成后需要Copy结果到内存
status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[0] * sizeof(int), outputBuffer, 0, NULL, NULL);
printf("Veryfy the rsults... ");
for (i = 0; i < array_size; i++)
{
if (outputBuffer[i] != (i + 1) * 2)
{
puts("Results not correct!");
break;
}
}
if (i == array_size)
puts("Correct!");
RELEASE_RESOURCES:
status = clReleaseKernel(kernel);//*Release kernel.
status = clReleaseProgram(program); //Release the program object.
status = clReleaseMemObject(input1MemObj);//Release mem object.
status = clReleaseMemObject(input2MemObj);
status = clReleaseMemObject(outputMemObj);
status = clReleaseCommandQueue(commandQueue);//Release Command queue.
status = clReleaseContext(context);//Release context.
free(devices);
getchar();
return 0;
}
/*
* clGetPlatformIDs---------------------------获取平台ID
*
* clGetDeviceIDs-----------------------------获取设备ID
*
* clCreateContext----------------------------创建上下文
*
* clCreateCommandQueue-----------------------创建命令队列
*
* clCreateBuffer-----------------------------创建设备内存
*
* clCreateProgramWithSource------------------创建程序
*
* clBuildProgram-----------------------------编译程序
*
* clGetProgramBuildInfo----------------------获取编译信息
*
* clCreateKernel-----------------------------创建核
*
* clSetKernelArg-----------------------------设置核参数
*
* clEnqueueNDRangeKernel---------------------执行核
*
* clEnqueueReadBuffer------------------------读取设备内存
*
* clReleaseMemObject-------------------------释放内存对象
*
* clReleaseKernel----------------------------释放核
*
* clReleaseCommandQueue----------------------释放命令队列
*
* clReleaseContext---------------------------释放上下文
*/
执行编译命令后验证方法如下
一、把文件推送到平台端
1.adb root
2.adb remount
3.adb push out\target\product\msm8953_64\system\bin\opencl data
4.adb push vendor\qcom\proprietary\mm-camera\mm-camera2\opencl\cl_kernel2.cl data
二、执行命令
1 ) chmod 777 data/opencl
2 ) cd data/ (若不执行,会报错误如下The kernel file not found!)
3 ) ./opencl
执行完后可得成功log如下
The number of devices: 1
craet OpenCL runtime sucess.
craet CommandQueue sucess.
kernelLength : 162
kernelCodeBuffer : __kernel void cl_add(__global int *dst, __global int *src1, __global int *src2)
{
int index = get_global_id(0);
dst[index] = src1[index] + src2[index];
}
Veryfy the rsults... Correct!