(四) opencl测试

1、在gpu编写opencl代码

#include 
#include 
#include 
#include 
#if defined(__APPLE__) || defined(__MACOSX)
#include 
#else
#include 
#endif
using namespace std;
#define KERNEL(...)#__VA_ARGS__
const char *kernelSourceCode = KERNEL(__kernel void hellocl(__global uint *buffer)
{
    size_t gidx = get_global_id(0);
    size_t gidy = get_global_id(1);
    size_t lidx = get_local_id(0);
    buffer[gidx + 4 * gidy] = (1 << gidx) | (0x10 << gidy);
});
 
int main(int argc, char const *argv[])
{
    printf("hello OpenCL\n");
    cl_int status = 0;
    size_t deviceListSize;
 
    // 当前服务器上配置的仅有NVIDIA Tesla C2050 的GPU
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(1, &platform, NULL);
 
    if (status != CL_SUCCESS) {
        printf("ERROR: Getting Platforms.(clGetPlatformIDs)\n");
        return EXIT_FAILURE;
    }
 
    // 如果我们能找到相应平台,就使用它,否则返回NULL
    cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties)platform,0};
    cl_context_properties *cprops = (NULL == platform) ? NULL : cps;
 
    // 生成 context
    cl_context context = clCreateContextFromType(cprops,CL_DEVICE_TYPE_GPU,NULL,NULL,&status);
    if (status != CL_SUCCESS) {
        printf("Error: Creating Context.(clCreateContexFromType)\n");
        return EXIT_FAILURE;
    }
 
    // 寻找OpenCL设备
 
    // 首先得到设备列表的长度
    status = clGetContextInfo(context,CL_CONTEXT_DEVICES,0,NULL,&deviceListSize);
    if (status != CL_SUCCESS) {
        printf("Error: Getting Context Info device list size, clGetContextInfo)\n");
        return EXIT_FAILURE;
    }
    cl_device_id *devices = (cl_device_id *)malloc(deviceListSize);
    if (devices == 0) {
        printf("Error: No devices found.\n");
        return EXIT_FAILURE;
    }
 
    // 现在得到设备列表
    status = clGetContextInfo(context,CL_CONTEXT_DEVICES,deviceListSize,devices,NULL);
    if (status != CL_SUCCESS) {
        printf("Error: Getting Context Info (device list, clGetContextInfo)\n");
        return EXIT_FAILURE;
    }
 
    // 装载内核程序,编译CL program ,生成CL内核实例
    size_t sourceSize[] = {strlen(kernelSourceCode)};
    cl_program program = clCreateProgramWithSource(context,1,&kernelSourceCode,sourceSize,&status);
    if (status != CL_SUCCESS) {
        printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
        return EXIT_FAILURE;
    }
 
    // 为指定的设备编译CL program.
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
    if (status != CL_SUCCESS) {
        printf("Error: Building Program (clBuildingProgram)\n");
        return EXIT_FAILURE;
    }
 
    // 得到指定名字的内核实例的句柄
    cl_kernel kernel = clCreateKernel(program, "hellocl", &status);
    if (status != CL_SUCCESS) {
        printf("Error: Creating Kernel from program.(clCreateKernel)\n");
        return EXIT_FAILURE;
    }
 
    // 创建 OpenCL buffer 对象
    unsigned int *outbuffer = new unsigned int [4 * 4];
    memset(outbuffer, 0, 4 * 4 * 4);
    cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, 4 * 4 * 4, NULL, &status);
 
    if (status != CL_SUCCESS) {
        printf("Error: Create Buffer, outputBuffer. (clCreateBuffer)\n");
        return EXIT_FAILURE;
    }
 
 
    //  为内核程序设置参数
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputBuffer);
    if (status != CL_SUCCESS) {
        printf("Error: Setting kernel argument. (clSetKernelArg)\n");
        return EXIT_FAILURE;
    }
 
    // 创建一个OpenCL command queue
    cl_command_queue commandQueue = clCreateCommandQueue(context,devices[0],0,&status);
    if (status != CL_SUCCESS) {
        printf("Error: Create Command Queue. (clCreateCommandQueue)\n");
        return EXIT_FAILURE;
    }
 
 
    // 将一个kernel 放入 command queue
    size_t globalThreads[] = {4, 4};
    size_t localThreads[] = {2, 2};
    status = clEnqueueNDRangeKernel(commandQueue, kernel,2, NULL, globalThreads,localThreads, 0,NULL, NULL);
    if (status != CL_SUCCESS) {
        printf("Error: Enqueueing kernel\n");
        return EXIT_FAILURE;
    }
 
    // 确认 command queue 中所有命令都执行完毕
    status = clFinish(commandQueue);
    if (status != CL_SUCCESS) {
        printf("Error: Finish command queue\n");
        return EXIT_FAILURE;
    }
 
    // 将内存对象中的结果读回Host
    status = clEnqueueReadBuffer(commandQueue,outputBuffer, CL_TRUE, 0,4 * 4 * 4, outbuffer, 0, NULL, NULL);
    if (status != CL_SUCCESS) {
        printf("Error: Read buffer queue\n");
        return EXIT_FAILURE;
    }
 
    // Host端打印结果
    printf("out:\n");
    for (int i = 0; i < 16; ++i) {
        printf("%x ", outbuffer[i]);
        if ((i + 1) % 4 == 0)
            printf("\n");
    }
 
    // 资源回收
    status = clReleaseKernel(kernel);
    status = clReleaseProgram(program);
    status = clReleaseMemObject(outputBuffer);
    status = clReleaseCommandQueue(commandQueue);
    status = clReleaseContext(context);
 
    free(devices);
    delete outbuffer;
    return 0;
}

2、使用交叉编译工具进行编译

cmake_minimum_required(VERSION 3.5.1)
project(cl_test)
SET(CMAKE_BUILD_TYPE "Release")

# SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall")

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)

set(tools /gzy_mnt/gcc-linaro-7.4.1-2019.02-x86_64_aarch64-linux-gnu)
set(CMAKE_C_COMPILER ${tools}/bin/aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${tools}/bin/aarch64-linux-gnu-g++)

set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/)


include_directories("/gzy_mnt/SDK/include/CL/")
# file(GLOB CL_LIB_DIRS "/gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/*")

add_executable(${CMAKE_PROJECT_NAME} cl-test.cpp)

target_link_libraries (
        ${CMAKE_PROJECT_NAME}   
        # /gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/libEGL.so.1.4.0
        # /gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/libGLESv1_CM.so.1.1.0
        # /gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/libGLESv2.so.2.1.0
        /gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/libmali.so.0
        /gzy_mnt/SDK/aarch64-linux-gnu-7.4.1/lib64/libOpenCL.so.2
)

你可能感兴趣的:(算法优化汇总,蓝桥杯,c++,职场和发展)