Android AARCH64 平台的 OpenCL 配置

原文地址:Android AARCH64 平台的 OpenCL 配置

Android AARCH64 平台的 OpenCL 配置

开发环境

IDE: Android Studio 3.4.1

Android: 7.1

minSdkVersion: 25

targetSdkVersion: 26

JNI CMake: 3.4.1

ABI: arm64-v8a

OpenCL: 1.2

配置 OpenCL 使用项目中的 so 库

这里以编译 openclTest.cpp 为 libopenclTest.so 并导入 OpenCL 的动态库为例

注1: 下列 so 库需要从开发板上 pull 到项目中,其中 libGLES_mali.so 用于驱动 OpenCL,其他库为依赖库

注2: 不同平台的驱动库位于不同位置,可以下载 OpenCL-Z 查看

注3: set_target_properties 中第一项参数设置了生成库的名称,例如设置为 openclTest,则生成 libopenclTest.so 库,这里使用了 lib_* 作为前缀,则生成liblib_*.so 库,实际开发中要避免这种情况

查看依赖库

objdump -x libGLES_mali.so | grep NEEDED

目录结构

opencltest
├─ app
│  ├─ build
│  ├─ libs
│  └─ src
│      ├─ androidTest
│      ├─ main
│      │  ├─ java
│      │  │  └─ com
│      │  │      └─ example
│      │  │          └─ opencltest
│      │  │                  MainActivity.java
│      │  ├─ jni    #C/C++ 源码目录
│      │  │  └─ openclTest.cpp
│      │  ├─ jniLibs    #JNI 需要调用的运行库
│      │  │  └─ arm64-v8a   #对应 ABI 版本建立文件夹
│      │  │      ├─ libbinder.so
│      │  │      ├─ libc++.so
│      │  │      ├─ libc.so
│      │  │      ├─ libcrypto.so
│      │  │      ├─ libcutils.so
│      │  │      ├─ libdl.so
│      │  │      ├─ libGLES_mali.so
│      │  │      ├─ libhardware.so
│      │  │      ├─ liblog.so
│      │  │      ├─ libm.so
│      │  │      ├─ libui.so
│      │  │      ├─ libutils.so
│      │  │      └─ libz.so
│      │  └─ res
│      └─ test
└─ gradle

CMakeLists.txt 增加配置

add_library(openclTest
        SHARED
        src/main/jni/openclTest.cpp )

add_library(lib_opencl SHARED IMPORTED)

set_target_properties(lib_opencl
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libGLES_mali.so )

add_library(lib_z SHARED IMPORTED)
set_target_properties(lib_z
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libz.so )

add_library(lib_log SHARED IMPORTED)
set_target_properties(lib_log
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/liblog.so )

add_library(lib_utils SHARED IMPORTED)
set_target_properties(lib_utils
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libutils.so )

add_library(lib_ui SHARED IMPORTED)
set_target_properties(lib_ui
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libui.so )

add_library(lib_cutils SHARED IMPORTED)
set_target_properties(lib_cutils
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcutils.so )

add_library(lib_binder SHARED IMPORTED)
set_target_properties(lib_binder
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libbinder.so )

add_library(lib_crypto SHARED IMPORTED)
set_target_properties(lib_crypto
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcrypto.so )

add_library(lib_dl SHARED IMPORTED)
set_target_properties(lib_dl
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libdl.so )

add_library(lib_hardware SHARED IMPORTED)
set_target_properties(lib_hardware
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libhardware.so )

add_library(lib_c++ SHARED IMPORTED)
set_target_properties(lib_c++
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc++.so )

add_library(lib_c SHARED IMPORTED)
set_target_properties(lib_c
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc.so )

add_library(lib_m SHARED IMPORTED)
set_target_properties(lib_m
        PROPERTIES
        IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libm.so )

target_link_libraries(openclTest
        ${log-lib}
        lib_opencl )

配置 OpenCL 使用 dlopen 打开开发板上的运行库

这种配置方法需要从 Github - KhronosGroup/OpenCL-Headers 仓库下载 OpenCL 的头文件,并且需要自行实现 OpenCL 的函数

注: 需要在源码中定义 CL_TARGET_OPENCL_VERSION 120,否则会报找不到版本号的错误

目录结构

opencltest
├─ app
│  ├─ build
│  ├─ libs
│  └─ src
│      ├─ androidTest
│      ├─ main
│      │  ├─ java
│      │  │  └─ com
│      │  │      └─ example
│      │  │          └─ opencltest
│      │  │                  MainActivity.java
│      │  ├─ jni    #C/C++ 源码目录
│      │  │  ├─ dlopencl.cpp    #dlopen 打开运行库,实现 OpenCL 中的函数
│      │  │  ├─ openclTest.cpp  
│      │  │  └─ include     #JNI 调用的头文件
│      │  │      ├─ dlopencl.h  #定义 OpenCL 中的函数
│      │  │      └─ CL      #OpenCL 需要使用的头文件
│      │  │          ├─ cl.h
│      │  │          ├─ cl_ext.h
│      │  │          ├─ cl_gl.h
│      │  │          ├─ cl_gl_ext.h
│      │  │          ├─ cl_platform.h
│      │  │          ├─ cl_version.h
│      │  │          └─ opencl.h
│      │  └─ res
│      └─ test
└─ gradle

CMakeLists.txt 增加配置

include_directories(${PROJECT_SOURCE_DIR}/src/main/jni/include)

add_library(openclTest
        SHARED
        src/main/jni/openclTest.cpp )

add_library(lib_dlopencl
        SHARED
        src/main/jni/dlopencl.cpp )

target_link_libraries(openclTest
        ${log-lib}
        lib_dlopencl )

需要导入的 dlopencl.h 和 dlopencl.cpp 写在文末

OpenCL 使用

以下均以使用 dlopen 导入运行库的方式为例

  1. 定义 OpenCL 版本
#define CL_TARGET_OPENCL_VERSION 120
  1. 包含头文件
#include 
#include "dlopencl.h"
  1. 使用 Logcat 打印日志

由于 C 语言工作在 JNI 层,无法获取控制台,导致了 printf() 函数失效,这里使用 __android_log_print 方法打印日志到 Logcat

#include 

#define DEBUG

#ifdef DEBUG
#define LOG    "LOG-TAG"
#define LOGD(...)  __android_log_print(ANDROID_LOG_DEBUG, LOG, __VA_ARGS__)
#define LOGI(...)  __android_log_print(ANDROID_LOG_INFO, LOG, __VA_ARGS__)
#define LOGW(...)  __android_log_print(ANDROID_LOG_WARN, LOG, __VA_ARGS__)
#define LOGE(...)  __android_log_print(ANDROID_LOG_ERROR, LOG, __VA_ARGS__)
#define LOGF(...)  __android_log_print(ANDROID_LOG_FATAL, LOG, __VA_ARGS__)
#else
#define LOG
#define LOGD(...)
#define LOGI(...)
#define LOGW(...)
#define LOGE(...)
#define LOGF(...)
#endif

使用方法同 printf(),例:

LOGI("Device ID: %d", device_id);
  1. 创建用于初始化 OpenCL 的 JNI 接口

建议在同一个文件中编写多个操作 OpenCL 的函数,这里的初始化实际上是将操作 OpenCL 的变量建立为全局变量,通过 JNI 一次调用后其他函数再使用这些被初始化过的变量

定义变量

cl_uint num_device;
cl_uint num_platform;
cl_platform_id *platform;
cl_device_id *devices;
cl_int err;
cl_context context;
extern "C"
JNIEXPORT jint JNICALL
Java_com_example_opencltest_MainActivity_initOpencl(JNIEnv *env, jobject instance) {
    initFns();
    LOGI("getPlatformNum");
    // 获取可用平台数量
    err = clGetPlatformIDs(0, 0, &num_platform);
    platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platform);
    LOGI("getPlatformIDs");
    // 获取平台 ID
    err = clGetPlatformIDs(num_platform, platform, nullptr);
    if(err < 0) {
        LOGE("clGetPlatformIDs failed");
        return -1;
    }

    LOGI("getDeviceNum");
    // 获取可用设备数量
    err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_device);
    devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_device);
    LOGI("getDeviceIDs");
    // 获取设备 ID
    err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, num_device, devices, nullptr);
    if (err < 0) {
        LOGE("clGetDeviceIDs failed");
        return -1;
    }

    return 0;
}

使用 OpenCL 执行 Kernel 函数

通常编写 OpenCL 中的 Kernel 函数要求单独写在 *.cl 文件中,调用时使用 fopen 打开,将里面的代码作为字符串读入之后再进行 runtime 编译,比较麻烦,这里直接将整个 Kernel 函数写成字符串

本例中因为测试原因没有释放掉创建的资源

可以使用允许正则替换的编辑器例如 VSCode,先写好 Kernel 函数,然后使用正则表达式 (.*) 选择所有行并将每一行作为一个参数($1),替换为 "$1\\n",例如替换前: int a = 0;,替换后: "int a = 0;\n"

注1: 经测试发现 OpenCL 在 PC 端允许每次调用任意数量的矢量数据,而在开发板上只允许一次调用 1.2.3.4.8.16 个

PC 端允许的矢量调用:

Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234
Integralgraph.s0123456
Integralgraph.s01234567
Integralgraph.s012345678
Integralgraph.s0123456789
Integralgraph.s0123456789a
Integralgraph.s0123456789ab
Integralgraph.s0123456789abc
Integralgraph.s0123456789abcd
Integralgraph.s0123456789abcde
Integralgraph.s0123456789abcdef

开发板允许的矢量调用:

Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234567
Integralgraph.s0123456789abcdef

所以在编写运行于 AARCH64 架构的 OpenCL Kernel 函数时需要将被调用的矢量数组分组相加

例如在 PC 端为

TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s0123456789abcde);

在开发板中需要改为

TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);

注2: 测试中发现 PC 端的 OpenCL 遇到数组越界问题会直接跳过不予处理,但在 AARCH64 中会导致执行失败

注3: AARCH64 平台上 OpenCL 能申请到的可调用内存远小于 PC 端,如果出现输入或输出的数据超出 OpenCL 申请到的内存可能会导致数据输出不完整,读取数据时错误代码返回 -14 等问题

先在 Activity 中调用 initOpencl(),初始化 OpenCL 及其平台和设备

// 定义积分图宽度
#define CLL_IMAGE_W (400)
// 定义积分图高度
#define CLL_IMAGE_H (80)

// 构建 Kernel 函数字符串
const char *clkernel[] = {
        "__kernel void kernel_Integralgraph_45int(__global int16 * grayImage,\n"
        "    __global int16 * Integralgraph,\n"
        "    __global unsigned * const p_height) {\n"
        "    int x = get_global_id(0);\n"
        "    int height = *p_height;\n"
        "    int width = get_global_size(0);\n"
        "    __local int index, index1, index2, index3, index4, index5;\n"
        "    __local bool flagx0, flagxw, flagy1, flagy2;\n"
        "    __local int16 TableInteg1, TableInteg2, TableInteg3;\n"
        "    flagx0 = min(0, -x);\n"
        "    flagxw = min(0, x - width + 1);\n"
        "    for (int j = 0; j < height; j++) {\n"
        "        flagy1 = min(0, -j);\n"
        "        flagy2 = min(0, 1 - j);\n"
        "        index = j * width + x;\n"
        "        if(j==0) {\n"
        "            Integralgraph[index] = (int16)grayImage[index];\n"
        "        } else if(j==1) {\n"
        "            index1 = (j - 1)*width + x - 1;\n"
        "            index3 = (j - 1)*width + x + 1;\n"
        "            index4 = (j - 1)*width + x;\n"
        "            TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
        "            TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
        "            Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
        "        } else {\n"
        "            index1 = (j - 1)*width + x - 1;\n"
        "            index2 = (j - 2)*width + x;\n"
        "            index3 = (j - 1)*width + x + 1;\n"
        "            index4 = (j - 1)*width + x;\n"
        "            TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
        "            TableInteg2 = (int16)(Integralgraph[index2].s0*flagx0, Integralgraph[index2].s12345678, Integralgraph[index2].s9ab, Integralgraph[index2].scde, Integralgraph[index2].sf*flagxw);\n"
        "            TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
        "            Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 - (int16)TableInteg2 * flagy2 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
        "        }\n"
        "    }\n"
        "}\n"
};

// 创建 Context
context = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &err);
if(err < 0) {
    LOGE("Create context failed, error code: [%d]", err);
    return -1;
}

int inputData[CLL_IMAGE_W * CLL_IMAGE_H];
int outputData[CLL_IMAGE_W * CLL_IMAGE_H];
int width = CLL_IMAGE_W;
int height = CLL_IMAGE_H;
// 初始化输入输出数据
for (int i = 0; i < height; i++) {
    for (int j = 0; j < width; j++) {
        inputData[i * width + j] = 1;
        outputData[i * width + j] = 0;
    }
}

clock_t time_start;
clock_t time_finish;
double total_time;

char *program_log;
cl_command_queue queue;
cl_kernel kernel;
size_t log_size;
cl_program program;

// 创建命令队列
queue = clCreateCommandQueue(context, devices[0], 0, &err);
if (err < 0) {
    LOGE("Create command queue failed, error code: [%d]", err);
}

// 创建程序
program = clCreateProgramWithSource(context, sizeof(clkernel) / sizeof(clkernel[0]), clkernel, nullptr, nullptr);

// 构建/编译程序
err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
if (err < 0) {
    clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
    program_log = (char*)malloc(sizeof(log_size));
    // 查询构建/编译过程中的 log
    clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, program_log, nullptr);
    LOGE("program_build_info: \n[%s]\n", program_log);
    free(program_log);
}

// 创建 Kernel
kernel = clCreateKernel(program, "kernel_Integralgraph_45int", &err);
if (err < 0) {
    LOGE("Create kernel failed, error code: [%d]", err);
}

// 创建用于输入参数、输出数据的内存空间
cl_mem meminput_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * height * width, inputData, &err);
cl_mem memoutput_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * height * width, nullptr, &err);
cl_mem memHeight_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &height, &err);
// 向 Kernel 传递参数
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &meminput_buffer);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoutput_buffer);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memHeight_buffer);

size_t global_work_offset[2] = {0, 0};
size_t localThreads[2] = {1, 1};
size_t globalThreads[2] = {(size_t)(width / 16), 1};

// 获得程序开始执行的时间戳
time_start = clock();

// 排布工作组与工作项并执行
err = clEnqueueNDRangeKernel(queue, kernel, 2, global_work_offset, globalThreads, localThreads, 0, nullptr, nullptr);
if (err < 0) {
    LOGE("Run Kernel failed, error code: [%d]", err);
}

// 获得程序执行完成的时间戳
time_finish = clock();
// 计算运行时间
total_time = (double)(time_finish - time_start) / CLOCKS_PER_SEC;
LOGI("Total time: [%f]s", total_time);

// 从输出的内存空间中读取数据
err = clEnqueueReadBuffer(queue, memoutput_buffer, CL_TRUE, 0, sizeof(int) * height * width, outputData, 0, nullptr, nullptr);
if (err < 0) {
    LOGE("Read buffer failed, error code: [%d]", err);
}

// 输出数据,积分图中的每一行拼接为一条 LOG
char outputDataTemp[2048];
for (int i = 0; i < height ; i++) {
    // 清空字符串,拼接下一行
    memset(outputDataTemp, 0x00, 2048);
    for (int j = 0; j < width ; j++) {
        // 将一行数据拼接在一个字符串中
        sprintf(outputDataTemp, "%s %d", outputDataTemp, outputData[i * width + j]);
    }
    // LOG打印
    LOGI("line [%d]\n%s", i, outputDataTemp);
}

附加

dlopencl.h

#ifndef __AOPENCL_CL_H
#define __AOPENCL_CL_H

#ifdef __APPLE__
#include 
#else
#include 
#endif

#ifdef __cplusplus
extern "C" {
#endif

#define IAH()
//#define IAH() printf("File:%s, Line:%d\n",__FILE__, __LINE__);

void initFns();

/* Platform API */
#define clGetPlatformIDs aclGetPlatformIDs
cl_int
(*aclGetPlatformIDs)(cl_uint          /* num_entries */,
                     cl_platform_id * /* platforms */,
                     cl_uint *        /* num_platforms */);

#define clGetPlatformInfo aclGetPlatformInfo
cl_int
(*aclGetPlatformInfo)(cl_platform_id   /* platform */,
                      cl_platform_info /* param_name */,
                      size_t           /* param_value_size */,
                      void *           /* param_value */,
                      size_t *         /* param_value_size_ret */);

/* Device APIs */
#define clGetDeviceIDs aclGetDeviceIDs
cl_int
(*aclGetDeviceIDs)(cl_platform_id   /* platform */,
                   cl_device_type   /* device_type */,
                   cl_uint          /* num_entries */,
                   cl_device_id *   /* devices */,
                   cl_uint *        /* num_devices */);

#define clGetDeviceInfo aclGetDeviceInfo
cl_int
(*aclGetDeviceInfo)(cl_device_id    /* device */,
                    cl_device_info  /* param_name */,
                    size_t          /* param_value_size */,
                    void *          /* param_value */,
                    size_t *        /* param_value_size_ret */);




/* Context APIs  */
#define clCreateContext aclCreateContext
cl_context
(*aclCreateContext)(const cl_context_properties * /* properties */,
                    cl_uint                 /* num_devices */,
                    const cl_device_id *    /* devices */,
                    void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
                    void *                  /* user_data */,
                    cl_int *                /* errcode_ret */);

#define clCreateContextFromType aclCreateContextFromType
cl_context
(*aclCreateContextFromType)(const cl_context_properties * /* properties */,
                            cl_device_type          /* device_type */,
                            void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
                            void *                  /* user_data */,
                            cl_int *                /* errcode_ret */);

#define clRetainContext aclRetainContext
cl_int
(*aclRetainContext)(cl_context /* context */);

#define clReleaseContext aclReleaseContext
cl_int
(*aclReleaseContext)(cl_context /* context */);

#define clGetContextInfo aclGetContextInfo
cl_int
(*aclGetContextInfo)(cl_context         /* context */,
                     cl_context_info    /* param_name */,
                     size_t             /* param_value_size */,
                     void *             /* param_value */,
                     size_t *           /* param_value_size_ret */);

/* Command Queue APIs */
#define clCreateCommandQueue aclCreateCommandQueue
cl_command_queue
(*aclCreateCommandQueue)(cl_context                     /* context */,
                         cl_device_id                   /* device */,
                         cl_command_queue_properties    /* properties */,
                         cl_int *                       /* errcode_ret */);

#define clRetainCommandQueue aclRetainCommandQueue
cl_int
(*aclRetainCommandQueue)(cl_command_queue /* command_queue */);

#define clReleaseCommandQueue aclReleaseCommandQueue
cl_int
(*aclReleaseCommandQueue)(cl_command_queue /* command_queue */);

#define clGetCommandQueueInfo aclGetCommandQueueInfo
cl_int
(*aclGetCommandQueueInfo)(cl_command_queue      /* command_queue */,
                          cl_command_queue_info /* param_name */,
                          size_t                /* param_value_size */,
                          void *                /* param_value */,
                          size_t *              /* param_value_size_ret */);

/* Memory Object APIs */
#define clCreateBuffer aclCreateBuffer
cl_mem
(*aclCreateBuffer)(cl_context   /* context */,
                   cl_mem_flags /* flags */,
                   size_t       /* size */,
                   void *       /* host_ptr */,
                   cl_int *     /* errcode_ret */);

#define clCreateSubBuffer aclCreateSubBuffer
cl_mem
(*aclCreateSubBuffer)(cl_mem                   /* buffer */,
                      cl_mem_flags             /* flags */,
                      cl_buffer_create_type    /* buffer_create_type */,
                      const void *             /* buffer_create_info */,
                      cl_int *                 /* errcode_ret */);


#define clRetainMemObject aclRetainMemObject
cl_int
(*aclRetainMemObject)(cl_mem /* memobj */);

#define clReleaseMemObject aclReleaseMemObject
cl_int
(*aclReleaseMemObject)(cl_mem /* memobj */);

#define clGetSupportedImageFormats aclGetSupportedImageFormats
cl_int
(*aclGetSupportedImageFormats)(cl_context           /* context */,
                               cl_mem_flags         /* flags */,
                               cl_mem_object_type   /* image_type */,
                               cl_uint              /* num_entries */,
                               cl_image_format *    /* image_formats */,
                               cl_uint *            /* num_image_formats */);

#define clGetMemObjectInfo aclGetMemObjectInfo
cl_int
(*aclGetMemObjectInfo)(cl_mem           /* memobj */,
                       cl_mem_info      /* param_name */,
                       size_t           /* param_value_size */,
                       void *           /* param_value */,
                       size_t *         /* param_value_size_ret */);

#define clGetImageInfo aclGetImageInfo
cl_int
(*aclGetImageInfo)(cl_mem           /* image */,
                   cl_image_info    /* param_name */,
                   size_t           /* param_value_size */,
                   void *           /* param_value */,
                   size_t *         /* param_value_size_ret */);

#define clSetMemObjectDestructorCallback aclSetMemObjectDestructorCallback
cl_int
(*aclSetMemObjectDestructorCallback)(  cl_mem /* memobj */,
                                       void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
                                       void * /*user_data */ );

/* Sampler APIs */
#define clCreateSampler aclCreateSampler
cl_sampler
(*aclCreateSampler)(cl_context          /* context */,
                    cl_bool             /* normalized_coords */,
                    cl_addressing_mode  /* addressing_mode */,
                    cl_filter_mode      /* filter_mode */,
                    cl_int *            /* errcode_ret */);

#define clRetainSampler aclRetainSampler
cl_int
(*aclRetainSampler)(cl_sampler /* sampler */);

#define clReleaseSampler aclReleaseSampler
cl_int
(*aclReleaseSampler)(cl_sampler /* sampler */);

#define clGetSamplerInfo aclGetSamplerInfo
cl_int
(*aclGetSamplerInfo)(cl_sampler         /* sampler */,
                     cl_sampler_info    /* param_name */,
                     size_t             /* param_value_size */,
                     void *             /* param_value */,
                     size_t *           /* param_value_size_ret */);

/* Program Object APIs  */
#define clCreateProgramWithSource aclCreateProgramWithSource
cl_program
(*aclCreateProgramWithSource)(cl_context        /* context */,
                              cl_uint           /* count */,
                              const char **     /* strings */,
                              const size_t *    /* lengths */,
                              cl_int *          /* errcode_ret */);

#define clCreateProgramWithBinary aclCreateProgramWithBinary
cl_program
(*aclCreateProgramWithBinary)(cl_context                     /* context */,
                              cl_uint                        /* num_devices */,
                              const cl_device_id *           /* device_list */,
                              const size_t *                 /* lengths */,
                              const unsigned char **         /* binaries */,
                              cl_int *                       /* binary_status */,
                              cl_int *                       /* errcode_ret */);


#define clRetainProgram aclRetainProgram
cl_int
(*aclRetainProgram)(cl_program /* program */);

#define clReleaseProgram aclReleaseProgram
cl_int
(*aclReleaseProgram)(cl_program /* program */);

#define clBuildProgram aclBuildProgram
cl_int
(*aclBuildProgram)(cl_program           /* program */,
                   cl_uint              /* num_devices */,
                   const cl_device_id * /* device_list */,
                   const char *         /* options */,
                   void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
                   void *               /* user_data */);





#define clGetProgramInfo aclGetProgramInfo
cl_int
(*aclGetProgramInfo)(cl_program         /* program */,
                     cl_program_info    /* param_name */,
                     size_t             /* param_value_size */,
                     void *             /* param_value */,
                     size_t *           /* param_value_size_ret */);

#define clGetProgramBuildInfo aclGetProgramBuildInfo
cl_int
(*aclGetProgramBuildInfo)(cl_program            /* program */,
                          cl_device_id          /* device */,
                          cl_program_build_info /* param_name */,
                          size_t                /* param_value_size */,
                          void *                /* param_value */,
                          size_t *              /* param_value_size_ret */);

/* Kernel Object APIs */
#define clCreateKernel aclCreateKernel
cl_kernel
(*aclCreateKernel)(cl_program      /* program */,
                   const char *    /* kernel_name */,
                   cl_int *        /* errcode_ret */);

#define clCreateKernelsInProgram aclCreateKernelsInProgram
cl_int
(*aclCreateKernelsInProgram)(cl_program     /* program */,
                             cl_uint        /* num_kernels */,
                             cl_kernel *    /* kernels */,
                             cl_uint *      /* num_kernels_ret */);

#define clRetainKernel aclRetainKernel
cl_int
(*aclRetainKernel)(cl_kernel    /* kernel */);

#define clReleaseKernel aclReleaseKernel
cl_int
(*aclReleaseKernel)(cl_kernel   /* kernel */);

#define clSetKernelArg aclSetKernelArg
cl_int
(*aclSetKernelArg)(cl_kernel    /* kernel */,
                   cl_uint      /* arg_index */,
                   size_t       /* arg_size */,
                   const void * /* arg_value */);

#define clGetKernelInfo aclGetKernelInfo
cl_int
(*aclGetKernelInfo)(cl_kernel       /* kernel */,
                    cl_kernel_info  /* param_name */,
                    size_t          /* param_value_size */,
                    void *          /* param_value */,
                    size_t *        /* param_value_size_ret */);


#define clGetKernelWorkGroupInfo aclGetKernelWorkGroupInfo
cl_int
(*aclGetKernelWorkGroupInfo)(cl_kernel                  /* kernel */,
                             cl_device_id               /* device */,
                             cl_kernel_work_group_info  /* param_name */,
                             size_t                     /* param_value_size */,
                             void *                     /* param_value */,
                             size_t *                   /* param_value_size_ret */);

/* Event Object APIs */
#define clWaitForEvents aclWaitForEvents
cl_int
(*aclWaitForEvents)(cl_uint             /* num_events */,
                    const cl_event *    /* event_list */);

#define clGetEventInfo aclGetEventInfo
cl_int
(*aclGetEventInfo)(cl_event         /* event */,
                   cl_event_info    /* param_name */,
                   size_t           /* param_value_size */,
                   void *           /* param_value */,
                   size_t *         /* param_value_size_ret */);

#define clCreateUserEvent aclCreateUserEvent
cl_event
(*aclCreateUserEvent)(cl_context    /* context */,
                      cl_int *      /* errcode_ret */);

#define clRetainEvent aclRetainEvent
cl_int
(*aclRetainEvent)(cl_event /* event */);

#define clReleaseEvent aclReleaseEvent
cl_int
(*aclReleaseEvent)(cl_event /* event */);

#define clSetUserEventStatus aclSetUserEventStatus
cl_int
(*aclSetUserEventStatus)(cl_event   /* event */,
                         cl_int     /* execution_status */);

#define clSetEventCallback aclSetEventCallback
cl_int
(*aclSetEventCallback)( cl_event    /* event */,
                        cl_int      /* command_exec_callback_type */,
                        void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
                        void *      /* user_data */);

/* Profiling APIs */
#define clGetEventProfilingInfo aclGetEventProfilingInfo
cl_int
(*aclGetEventProfilingInfo)(cl_event            /* event */,
                            cl_profiling_info   /* param_name */,
                            size_t              /* param_value_size */,
                            void *              /* param_value */,
                            size_t *            /* param_value_size_ret */);

/* Flush and Finish APIs */
#define clFlush aclFlush
cl_int
(*aclFlush)(cl_command_queue /* command_queue */);

#define clFinish aclFinish
cl_int
(*aclFinish)(cl_command_queue /* command_queue */);

/* Enqueued Commands APIs */
#define clEnqueueReadBuffer aclEnqueueReadBuffer
cl_int
(*aclEnqueueReadBuffer)(cl_command_queue    /* command_queue */,
                        cl_mem              /* buffer */,
                        cl_bool             /* blocking_read */,
                        size_t              /* offset */,
                        size_t              /* size */,
                        void *              /* ptr */,
                        cl_uint             /* num_events_in_wait_list */,
                        const cl_event *    /* event_wait_list */,
                        cl_event *          /* event */);

#define clEnqueueReadBufferRect aclEnqueueReadBufferRect
cl_int
(*aclEnqueueReadBufferRect)(cl_command_queue    /* command_queue */,
                            cl_mem              /* buffer */,
                            cl_bool             /* blocking_read */,
                            const size_t *      /* buffer_offset */,
                            const size_t *      /* host_offset */,
                            const size_t *      /* region */,
                            size_t              /* buffer_row_pitch */,
                            size_t              /* buffer_slice_pitch */,
                            size_t              /* host_row_pitch */,
                            size_t              /* host_slice_pitch */,
                            void *              /* ptr */,
                            cl_uint             /* num_events_in_wait_list */,
                            const cl_event *    /* event_wait_list */,
                            cl_event *          /* event */);

#define clEnqueueWriteBuffer aclEnqueueWriteBuffer
cl_int
(*aclEnqueueWriteBuffer)(cl_command_queue   /* command_queue */,
                         cl_mem             /* buffer */,
                         cl_bool            /* blocking_write */,
                         size_t             /* offset */,
                         size_t             /* size */,
                         const void *       /* ptr */,
                         cl_uint            /* num_events_in_wait_list */,
                         const cl_event *   /* event_wait_list */,
                         cl_event *         /* event */);

#define clEnqueueWriteBufferRect aclEnqueueWriteBufferRect
cl_int
(*aclEnqueueWriteBufferRect)(cl_command_queue    /* command_queue */,
                             cl_mem              /* buffer */,
                             cl_bool             /* blocking_write */,
                             const size_t *      /* buffer_offset */,
                             const size_t *      /* host_offset */,
                             const size_t *      /* region */,
                             size_t              /* buffer_row_pitch */,
                             size_t              /* buffer_slice_pitch */,
                             size_t              /* host_row_pitch */,
                             size_t              /* host_slice_pitch */,
                             const void *        /* ptr */,
                             cl_uint             /* num_events_in_wait_list */,
                             const cl_event *    /* event_wait_list */,
                             cl_event *          /* event */);


#define clEnqueueCopyBuffer aclEnqueueCopyBuffer
cl_int
(*aclEnqueueCopyBuffer)(cl_command_queue    /* command_queue */,
                        cl_mem              /* src_buffer */,
                        cl_mem              /* dst_buffer */,
                        size_t              /* src_offset */,
                        size_t              /* dst_offset */,
                        size_t              /* size */,
                        cl_uint             /* num_events_in_wait_list */,
                        const cl_event *    /* event_wait_list */,
                        cl_event *          /* event */);

#define clEnqueueCopyBufferRect aclEnqueueCopyBufferRect
cl_int
(*aclEnqueueCopyBufferRect)(cl_command_queue    /* command_queue */,
                            cl_mem              /* src_buffer */,
                            cl_mem              /* dst_buffer */,
                            const size_t *      /* src_origin */,
                            const size_t *      /* dst_origin */,
                            const size_t *      /* region */,
                            size_t              /* src_row_pitch */,
                            size_t              /* src_slice_pitch */,
                            size_t              /* dst_row_pitch */,
                            size_t              /* dst_slice_pitch */,
                            cl_uint             /* num_events_in_wait_list */,
                            const cl_event *    /* event_wait_list */,
                            cl_event *          /* event */);

#define clEnqueueReadImage aclEnqueueReadImage
cl_int
(*aclEnqueueReadImage)(cl_command_queue     /* command_queue */,
                       cl_mem               /* image */,
                       cl_bool              /* blocking_read */,
                       const size_t *       /* origin[3] */,
                       const size_t *       /* region[3] */,
                       size_t               /* row_pitch */,
                       size_t               /* slice_pitch */,
                       void *               /* ptr */,
                       cl_uint              /* num_events_in_wait_list */,
                       const cl_event *     /* event_wait_list */,
                       cl_event *           /* event */);

#define clEnqueueWriteImage aclEnqueueWriteImage
cl_int
(*aclEnqueueWriteImage)(cl_command_queue    /* command_queue */,
                        cl_mem              /* image */,
                        cl_bool             /* blocking_write */,
                        const size_t *      /* origin[3] */,
                        const size_t *      /* region[3] */,
                        size_t              /* input_row_pitch */,
                        size_t              /* input_slice_pitch */,
                        const void *        /* ptr */,
                        cl_uint             /* num_events_in_wait_list */,
                        const cl_event *    /* event_wait_list */,
                        cl_event *          /* event */);


#define clEnqueueCopyImage aclEnqueueCopyImage
cl_int
(*aclEnqueueCopyImage)(cl_command_queue     /* command_queue */,
                       cl_mem               /* src_image */,
                       cl_mem               /* dst_image */,
                       const size_t *       /* src_origin[3] */,
                       const size_t *       /* dst_origin[3] */,
                       const size_t *       /* region[3] */,
                       cl_uint              /* num_events_in_wait_list */,
                       const cl_event *     /* event_wait_list */,
                       cl_event *           /* event */);

#define clEnqueueCopyImageToBuffer aclEnqueueCopyImageToBuffer
cl_int
(*aclEnqueueCopyImageToBuffer)(cl_command_queue /* command_queue */,
                               cl_mem           /* src_image */,
                               cl_mem           /* dst_buffer */,
                               const size_t *   /* src_origin[3] */,
                               const size_t *   /* region[3] */,
                               size_t           /* dst_offset */,
                               cl_uint          /* num_events_in_wait_list */,
                               const cl_event * /* event_wait_list */,
                               cl_event *       /* event */);

#define clEnqueueCopyBufferToImage aclEnqueueCopyBufferToImage
cl_int
(*aclEnqueueCopyBufferToImage)(cl_command_queue /* command_queue */,
                               cl_mem           /* src_buffer */,
                               cl_mem           /* dst_image */,
                               size_t           /* src_offset */,
                               const size_t *   /* dst_origin[3] */,
                               const size_t *   /* region[3] */,
                               cl_uint          /* num_events_in_wait_list */,
                               const cl_event * /* event_wait_list */,
                               cl_event *       /* event */);

void *
(*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */,
                       cl_mem           /* buffer */,
                       cl_bool          /* blocking_map */,
                       cl_map_flags     /* map_flags */,
                       size_t           /* offset */,
                       size_t           /* size */,
                       cl_uint          /* num_events_in_wait_list */,
                       const cl_event * /* event_wait_list */,
                       cl_event *       /* event */,
                       cl_int *         /* errcode_ret */);

void *
(*aclEnqueueMapImage)(cl_command_queue  /* command_queue */,
                      cl_mem            /* image */,
                      cl_bool           /* blocking_map */,
                      cl_map_flags      /* map_flags */,
                      const size_t *    /* origin[3] */,
                      const size_t *    /* region[3] */,
                      size_t *          /* image_row_pitch */,
                      size_t *          /* image_slice_pitch */,
                      cl_uint           /* num_events_in_wait_list */,
                      const cl_event *  /* event_wait_list */,
                      cl_event *        /* event */,
                      cl_int *          /* errcode_ret */);

#define clEnqueueUnmapMemObject aclEnqueueUnmapMemObject
cl_int
(*aclEnqueueUnmapMemObject)(cl_command_queue /* command_queue */,
                            cl_mem           /* memobj */,
                            void *           /* mapped_ptr */,
                            cl_uint          /* num_events_in_wait_list */,
                            const cl_event *  /* event_wait_list */,
                            cl_event *        /* event */);

#define clEnqueueNDRangeKernel aclEnqueueNDRangeKernel
cl_int
(*aclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */,
                           cl_kernel        /* kernel */,
                           cl_uint          /* work_dim */,
                           const size_t *   /* global_work_offset */,
                           const size_t *   /* global_work_size */,
                           const size_t *   /* local_work_size */,
                           cl_uint          /* num_events_in_wait_list */,
                           const cl_event * /* event_wait_list */,
                           cl_event *       /* event */);

#define clEnqueueTask aclEnqueueTask
cl_int
(*aclEnqueueTask)(cl_command_queue  /* command_queue */,
                  cl_kernel         /* kernel */,
                  cl_uint           /* num_events_in_wait_list */,
                  const cl_event *  /* event_wait_list */,
                  cl_event *        /* event */);

#define clEnqueueNativeKernel aclEnqueueNativeKernel
cl_int
(*aclEnqueueNativeKernel)(cl_command_queue  /* command_queue */,
                          void (CL_CALLBACK * /*user_func*/)(void *),
                          void *            /* args */,
                          size_t            /* cb_args */,
                          cl_uint           /* num_mem_objects */,
                          const cl_mem *    /* mem_list */,
                          const void **     /* args_mem_loc */,
                          cl_uint           /* num_events_in_wait_list */,
                          const cl_event *  /* event_wait_list */,
                          cl_event *        /* event */);





#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
//#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
    /*
     *  WARNING:
     *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
     *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
     *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
     *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
     *
     *  Software developers previously relying on this API are instructed to set the command queue
     *  properties when creating the queue, instead.
     */
#define clSetCommandQueueProperty aclSetCommandQueueProperty
 cl_int
(*aclSetCommandQueueProperty)(cl_command_queue              /* command_queue */,
                              cl_command_queue_properties   /* properties */,
                              cl_bool                        /* enable */,
                              cl_command_queue_properties * /* old_properties */);
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */


#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define clCreateImage2D aclCreateImage2D
 cl_mem
(*aclCreateImage2D)(cl_context              /* context */,
                    cl_mem_flags            /* flags */,
                    const cl_image_format * /* image_format */,
                    size_t                  /* image_width */,
                    size_t                  /* image_height */,
                    size_t                  /* image_row_pitch */,
                    void *                  /* host_ptr */,
                    cl_int *                /* errcode_ret */);

#define clCreateImage3D aclCreateImage3D
 cl_mem
(*aclCreateImage3D)(cl_context              /* context */,
                    cl_mem_flags            /* flags */,
                    const cl_image_format * /* image_format */,
                    size_t                  /* image_width */,
                    size_t                  /* image_height */,
                    size_t                  /* image_depth */,
                    size_t                  /* image_row_pitch */,
                    size_t                  /* image_slice_pitch */,
                    void *                  /* host_ptr */,
                    cl_int *                /* errcode_ret */);

#define clEnqueueMarker aclEnqueueMarker
 cl_int
(*aclEnqueueMarker)(cl_command_queue    /* command_queue */,
                    cl_event *          /* event */);

#define clEnqueueWaitForEvents aclEnqueueWaitForEvents
 cl_int
(*aclEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
                           cl_uint          /* num_events */,
                           const cl_event * /* event_list */);

#define clEnqueueBarrier aclEnqueueBarrier
 cl_int
(*aclEnqueueBarrier)(cl_command_queue /* command_queue */);

#define clUnloadCompiler aclUnloadCompiler
 cl_int
(*aclUnloadCompiler)(void);

void *
(*aclGetExtensionFunctionAddress)(const char * /* func_name */);
#endif

#ifdef __cplusplus
}
#endif

#endif  /* __AOPENCL_CL_H */

dlopencl.cpp

根据实际情况自行修改 so_path 的路径

#define CL_TARGET_OPENCL_VERSION 120
#include "CL/cl.h"
#include 
#include 
#include 
#include "dlopencl.h"

int loadedCL;

void *getCLHandle() {
    LOGD("get_handle");
    void *res = nullptr;
    char *so_path = (char*)"/system/vendor/lib64/egl/libGLES_mali.so";
    res = dlopen(so_path, RTLD_LAZY);

    if (res == nullptr) {
        LOGD("Open library failed");
    } else {
        LOGD("Loaded library name: [%s]", so_path);
    }

    return res;
}

void initFns() {
    loadedCL = 0;
    void *handle = getCLHandle();
    if (handle == nullptr) return;

/* Platform API */
    IAH();
    aclGetPlatformIDs = (cl_int  (*)(cl_uint          /* num_entries */,
                                     cl_platform_id * /* platforms */,
                                     cl_uint *        /* num_platforms */)) dlsym(handle, "clGetPlatformIDs");

    IAH();
    aclGetPlatformInfo = (cl_int   (*)(cl_platform_id   /* platform */,
                                       cl_platform_info /* param_name */,
                                       size_t           /* param_value_size */,
                                       void *           /* param_value */,
                                       size_t *         /* param_value_size_ret */)) dlsym(handle, "clGetPlatformInfo");

/* Device APIs */
    IAH();
    aclGetDeviceIDs = (cl_int  (*)(cl_platform_id   /* platform */,
                                   cl_device_type   /* device_type */,
                                   cl_uint          /* num_entries */,
                                   cl_device_id *   /* devices */,
                                   cl_uint *        /* num_devices */)) dlsym(handle, "clGetDeviceIDs");

    IAH();
    aclGetDeviceInfo = (cl_int  (*)(cl_device_id    /* device */,
                                    cl_device_info  /* param_name */,
                                    size_t          /* param_value_size */,
                                    void *          /* param_value */,
                                    size_t *        /* param_value_size_ret */)) dlsym(handle, "clGetDeviceInfo");

/* Context APIs  */
    IAH();
    aclCreateContext = (cl_context  (*)(const cl_context_properties * /* properties */,
    cl_uint                 /* num_devices */,
    const cl_device_id *    /* devices */,
    void (CL_CALLBACK *     /* pfn_notify */)(const char *, const void *, size_t, void *),
    void *                  /* user_data */,
    cl_int *                /* errcode_ret */)) dlsym(handle, "clCreateContext");

    IAH();
    aclCreateContextFromType = (cl_context  (*)(const cl_context_properties * /* properties */,
    cl_device_type          /* device_type */,
    void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
    void *                  /* user_data */,
    cl_int *                /* errcode_ret */)) dlsym(handle, "clCreateContextFromType");

    IAH();
    aclRetainContext = (cl_int  (*)(cl_context /* context */)) dlsym(handle, "clRetainContext");

    IAH();
    aclReleaseContext = (cl_int  (*)(cl_context /* context */)) dlsym(handle, "clReleaseContext");

    IAH();
    aclGetContextInfo = (cl_int  (*)(cl_context         /* context */,
                                     cl_context_info    /* param_name */,
                                     size_t             /* param_value_size */,
                                     void *             /* param_value */,
                                     size_t *           /* param_value_size_ret */)) dlsym(handle, "clGetContextInfo");

/* Command Queue APIs */
    IAH();
    aclCreateCommandQueue = (cl_command_queue  (*)(cl_context                     /* context */,
                                                   cl_device_id                   /* device */,
                                                   cl_command_queue_properties    /* properties */,
                                                   cl_int *                       /* errcode_ret */)) dlsym(handle, "clCreateCommandQueue");

    IAH();
    aclRetainCommandQueue = (cl_int  (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clRetainCommandQueue");

    IAH();
    aclReleaseCommandQueue = (cl_int  (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clReleaseCommandQueue");

    IAH();
    aclGetCommandQueueInfo = (cl_int  (*)(cl_command_queue      /* command_queue */,
                                          cl_command_queue_info /* param_name */,
                                          size_t                /* param_value_size */,
                                          void *                /* param_value */,
                                          size_t *              /* param_value_size_ret */)) dlsym(handle, "clGetCommandQueueInfo");

/* Memory Object APIs */
    IAH();
    aclCreateBuffer = (cl_mem  (*)(cl_context   /* context */,
                                   cl_mem_flags /* flags */,
                                   size_t       /* size */,
                                   void *       /* host_ptr */,
                                   cl_int *     /* errcode_ret */)) dlsym(handle, "clCreateBuffer");

    IAH();
    aclCreateSubBuffer = (cl_mem  (*)(cl_mem                   /* buffer */,
                                      cl_mem_flags             /* flags */,
                                      cl_buffer_create_type    /* buffer_create_type */,
                                      const void *             /* buffer_create_info */,
                                      cl_int *                 /* errcode_ret */)) dlsym(handle, "clCreateSubBuffer");

    IAH();
    aclRetainMemObject = (cl_int  (*)(cl_mem /* memobj */)) dlsym(handle, "clRetainMemObject");

    IAH();
    aclReleaseMemObject = (cl_int  (*)(cl_mem /* memobj */)) dlsym(handle, "clReleaseMemObject");

    IAH();
    aclGetSupportedImageFormats = (cl_int  (*)(cl_context           /* context */,
                                               cl_mem_flags         /* flags */,
                                               cl_mem_object_type   /* image_type */,
                                               cl_uint              /* num_entries */,
                                               cl_image_format *    /* image_formats */,
                                               cl_uint *            /* num_image_formats */)) dlsym(handle, "clGetSupportedImageFormats");

    IAH();
    aclGetMemObjectInfo = (cl_int  (*)(cl_mem           /* memobj */,
                                       cl_mem_info      /* param_name */,
                                       size_t           /* param_value_size */,
                                       void *           /* param_value */,
                                       size_t *         /* param_value_size_ret */)) dlsym(handle, "clGetMemObjectInfo");

    IAH();
    aclGetImageInfo = (cl_int  (*)(cl_mem           /* image */,
                                   cl_image_info    /* param_name */,
                                   size_t           /* param_value_size */,
                                   void *           /* param_value */,
                                   size_t *         /* param_value_size_ret */)) dlsym(handle, "clGetImageInfo");

    IAH();
    aclSetMemObjectDestructorCallback = (cl_int  (*)(  cl_mem /* memobj */,
                                                       void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
            void * /*user_data */ )) dlsym(handle, "clSetMemObjectDestructorCallback");

/* Sampler APIs */
    IAH();
    aclCreateSampler = (cl_sampler  (*)(cl_context          /* context */,
                                        cl_bool             /* normalized_coords */,
                                        cl_addressing_mode  /* addressing_mode */,
                                        cl_filter_mode      /* filter_mode */,
                                        cl_int *            /* errcode_ret */)) dlsym(handle, "clCreateSampler");

    IAH();
    aclRetainSampler = (cl_int  (*)(cl_sampler /* sampler */)) dlsym(handle, "clRetainSampler");

    IAH();
    aclReleaseSampler = (cl_int  (*)(cl_sampler /* sampler */)) dlsym(handle, "clReleaseSampler");

    IAH();
    aclGetSamplerInfo = (cl_int  (*)(cl_sampler         /* sampler */,
                                     cl_sampler_info    /* param_name */,
                                     size_t             /* param_value_size */,
                                     void *             /* param_value */,
                                     size_t *           /* param_value_size_ret */)) dlsym(handle, "clGetSamplerInfo");

/* Program Object APIs  */
    IAH();
    aclCreateProgramWithSource = (cl_program  (*)(cl_context        /* context */,
                                                  cl_uint           /* count */,
                                                  const char **     /* strings */,
                                                  const size_t *    /* lengths */,
                                                  cl_int *          /* errcode_ret */)) dlsym(handle, "clCreateProgramWithSource");

    IAH();
    aclCreateProgramWithBinary = (cl_program  (*)(cl_context                /* context */,
                                                  cl_uint                   /* num_devices */,
                                                  const cl_device_id *      /* device_list */,
                                                  const size_t *            /* lengths */,
                                                  const unsigned char **    /* binaries */,
                                                  cl_int *                  /* binary_status */,
                                                  cl_int *                  /* errcode_ret */)) dlsym(handle, "clCreateProgramWithBinary");

    IAH();
    aclRetainProgram = (cl_int  (*)(cl_program /* program */)) dlsym(handle, "clRetainProgram");

    IAH();
    aclReleaseProgram = (cl_int  (*)(cl_program /* program */)) dlsym(handle, "clReleaseProgram");

    IAH();
    aclBuildProgram = (cl_int  (*)(cl_program           /* program */,
                                   cl_uint              /* num_devices */,
    const cl_device_id * /* device_list */,
    const char *         /* options */,
    void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
    void *               /* user_data */)) dlsym(handle, "clBuildProgram");

    IAH();
    aclGetProgramInfo = (cl_int  (*)(cl_program         /* program */,
                                     cl_program_info    /* param_name */,
                                     size_t             /* param_value_size */,
                                     void *             /* param_value */,
                                     size_t *           /* param_value_size_ret */)) dlsym(handle, "clGetProgramInfo");

    IAH();
    aclGetProgramBuildInfo = (cl_int  (*)(cl_program            /* program */,
                                          cl_device_id          /* device */,
                                          cl_program_build_info /* param_name */,
                                          size_t                /* param_value_size */,
                                          void *                /* param_value */,
                                          size_t *              /* param_value_size_ret */)) dlsym(handle, "clGetProgramBuildInfo");

/* Kernel Object APIs */
    IAH();
    aclCreateKernel = (cl_kernel  (*)(cl_program      /* program */,
                                      const char *    /* kernel_name */,
                                      cl_int *        /* errcode_ret */)) dlsym(handle, "clCreateKernel");

    IAH();
    aclCreateKernelsInProgram = (cl_int  (*)(cl_program     /* program */,
                                             cl_uint        /* num_kernels */,
                                             cl_kernel *    /* kernels */,
                                             cl_uint *      /* num_kernels_ret */)) dlsym(handle, "clCreateKernelsInProgram");

    IAH();
    aclRetainKernel = (cl_int  (*)(cl_kernel    /* kernel */)) dlsym(handle, "clRetainKernel");

    IAH();
    aclReleaseKernel = (cl_int  (*)(cl_kernel   /* kernel */)) dlsym(handle, "clReleaseKernel");

    IAH();
    aclSetKernelArg = (cl_int  (*)(cl_kernel    /* kernel */,
                                   cl_uint      /* arg_index */,
                                   size_t       /* arg_size */,
                                   const void * /* arg_value */)) dlsym(handle, "clSetKernelArg");

    IAH();
    aclGetKernelInfo = (cl_int  (*)(cl_kernel       /* kernel */,
                                    cl_kernel_info  /* param_name */,
                                    size_t          /* param_value_size */,
                                    void *          /* param_value */,
                                    size_t *        /* param_value_size_ret */)) dlsym(handle, "clGetKernelInfo");

    IAH();
    aclGetKernelWorkGroupInfo = (cl_int  (*)(cl_kernel                  /* kernel */,
                                             cl_device_id               /* device */,
                                             cl_kernel_work_group_info  /* param_name */,
                                             size_t                     /* param_value_size */,
                                             void *                     /* param_value */,
                                             size_t *                   /* param_value_size_ret */)) dlsym(handle, "clGetKernelWorkGroupInfo");

/* Event Object APIs */
    IAH();
    aclWaitForEvents = (cl_int  (*)(cl_uint             /* num_events */,
                                    const cl_event *    /* event_list */)) dlsym(handle, "clWaitForEvents");

    IAH();
    aclGetEventInfo = (cl_int  (*)(cl_event         /* event */,
                                   cl_event_info    /* param_name */,
                                   size_t           /* param_value_size */,
                                   void *           /* param_value */,
                                   size_t *         /* param_value_size_ret */)) dlsym(handle, "clGetEventInfo");

    IAH();
    aclCreateUserEvent = (cl_event  (*)(cl_context    /* context */,
                                        cl_int *      /* errcode_ret */)) dlsym(handle, "clCreateUserEvent");

    IAH();
    aclRetainEvent = (cl_int  (*)(cl_event /* event */)) dlsym(handle, "clRetainEvent");

    IAH();
    aclReleaseEvent = (cl_int  (*)(cl_event /* event */)) dlsym(handle, "clReleaseEvent");

    IAH();
    aclSetUserEventStatus = (cl_int  (*)(cl_event   /* event */,
                                         cl_int     /* execution_status */)) dlsym(handle, "clSetUserEventStatus");

    IAH();
    aclSetEventCallback = (cl_int  (*)( cl_event    /* event */,
                                        cl_int      /* command_exec_callback_type */,
                                        void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
            void *      /* user_data */)) dlsym(handle, "clSetEventCallback");

/* Profiling APIs */
    IAH();
    aclGetEventProfilingInfo = (cl_int  (*)(cl_event            /* event */,
                                            cl_profiling_info   /* param_name */,
                                            size_t              /* param_value_size */,
                                            void *              /* param_value */,
                                            size_t *            /* param_value_size_ret */)) dlsym(handle, "clGetEventProfilingInfo");

/* Flush and Finish APIs */
    IAH();
    aclFlush = (cl_int  (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFlush");

    IAH();
    aclFinish = (cl_int  (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFinish");

/* Enqueued Commands APIs */
    IAH();
    aclEnqueueReadBuffer = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                        cl_mem              /* buffer */,
                                        cl_bool             /* blocking_read */,
                                        size_t              /* offset */,
                                        size_t              /* size */,
                                        void *              /* ptr */,
                                        cl_uint             /* num_events_in_wait_list */,
                                        const cl_event *    /* event_wait_list */,
                                        cl_event *          /* event */)) dlsym(handle, "clEnqueueReadBuffer");

    IAH();
    aclEnqueueReadBufferRect = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                            cl_mem              /* buffer */,
                                            cl_bool             /* blocking_read */,
                                            const size_t *      /* buffer_offset */,
                                            const size_t *      /* host_offset */,
                                            const size_t *      /* region */,
                                            size_t              /* buffer_row_pitch */,
                                            size_t              /* buffer_slice_pitch */,
                                            size_t              /* host_row_pitch */,
                                            size_t              /* host_slice_pitch */,
                                            void *              /* ptr */,
                                            cl_uint             /* num_events_in_wait_list */,
                                            const cl_event *    /* event_wait_list */,
                                            cl_event *          /* event */)) dlsym(handle, "clEnqueueReadBufferRect");

    IAH();
    aclEnqueueWriteBuffer = (cl_int  (*)(cl_command_queue   /* command_queue */,
                                         cl_mem             /* buffer */,
                                         cl_bool            /* blocking_write */,
                                         size_t             /* offset */,
                                         size_t             /* size */,
                                         const void *       /* ptr */,
                                         cl_uint            /* num_events_in_wait_list */,
                                         const cl_event *   /* event_wait_list */,
                                         cl_event *         /* event */)) dlsym(handle, "clEnqueueWriteBuffer");

    IAH();
    aclEnqueueWriteBufferRect = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                             cl_mem              /* buffer */,
                                             cl_bool             /* blocking_write */,
                                             const size_t *      /* buffer_offset */,
                                             const size_t *      /* host_offset */,
                                             const size_t *      /* region */,
                                             size_t              /* buffer_row_pitch */,
                                             size_t              /* buffer_slice_pitch */,
                                             size_t              /* host_row_pitch */,
                                             size_t              /* host_slice_pitch */,
                                             const void *        /* ptr */,
                                             cl_uint             /* num_events_in_wait_list */,
                                             const cl_event *    /* event_wait_list */,
                                             cl_event *          /* event */)) dlsym(handle, "clEnqueueWriteBufferRect");

    IAH();
    aclEnqueueCopyBuffer = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                        cl_mem              /* src_buffer */,
                                        cl_mem              /* dst_buffer */,
                                        size_t              /* src_offset */,
                                        size_t              /* dst_offset */,
                                        size_t              /* size */,
                                        cl_uint             /* num_events_in_wait_list */,
                                        const cl_event *    /* event_wait_list */,
                                        cl_event *          /* event */)) dlsym(handle, "clEnqueueCopyBuffer");

    IAH();
    aclEnqueueCopyBufferRect = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                            cl_mem              /* src_buffer */,
                                            cl_mem              /* dst_buffer */,
                                            const size_t *      /* src_origin */,
                                            const size_t *      /* dst_origin */,
                                            const size_t *      /* region */,
                                            size_t              /* src_row_pitch */,
                                            size_t              /* src_slice_pitch */,
                                            size_t              /* dst_row_pitch */,
                                            size_t              /* dst_slice_pitch */,
                                            cl_uint             /* num_events_in_wait_list */,
                                            const cl_event *    /* event_wait_list */,
                                            cl_event *          /* event */)) dlsym(handle, "clEnqueueCopyBufferRect");

    IAH();
    aclEnqueueReadImage = (cl_int  (*)(cl_command_queue     /* command_queue */,
                                       cl_mem               /* image */,
                                       cl_bool              /* blocking_read */,
                                       const size_t *       /* origin[3] */,
                                       const size_t *       /* region[3] */,
                                       size_t               /* row_pitch */,
                                       size_t               /* slice_pitch */,
                                       void *               /* ptr */,
                                       cl_uint              /* num_events_in_wait_list */,
                                       const cl_event *     /* event_wait_list */,
                                       cl_event *           /* event */)) dlsym(handle, "clEnqueueReadImage");

    IAH();
    aclEnqueueWriteImage = (cl_int  (*)(cl_command_queue    /* command_queue */,
                                        cl_mem              /* image */,
                                        cl_bool             /* blocking_write */,
                                        const size_t *      /* origin[3] */,
                                        const size_t *      /* region[3] */,
                                        size_t              /* input_row_pitch */,
                                        size_t              /* input_slice_pitch */,
                                        const void *        /* ptr */,
                                        cl_uint             /* num_events_in_wait_list */,
                                        const cl_event *    /* event_wait_list */,
                                        cl_event *          /* event */)) dlsym(handle, "clEnqueueWriteImage");

    IAH();
    aclEnqueueCopyImage = (cl_int  (*)(cl_command_queue     /* command_queue */,
                                       cl_mem               /* src_image */,
                                       cl_mem               /* dst_image */,
                                       const size_t *       /* src_origin[3] */,
                                       const size_t *       /* dst_origin[3] */,
                                       const size_t *       /* region[3] */,
                                       cl_uint              /* num_events_in_wait_list */,
                                       const cl_event *     /* event_wait_list */,
                                       cl_event *           /* event */)) dlsym(handle, "clEnqueueCopyImage");

    IAH();
    aclEnqueueCopyImageToBuffer = (cl_int  (*)(cl_command_queue /* command_queue */,
                                               cl_mem           /* src_image */,
                                               cl_mem           /* dst_buffer */,
                                               const size_t *   /* src_origin[3] */,
                                               const size_t *   /* region[3] */,
                                               size_t           /* dst_offset */,
                                               cl_uint          /* num_events_in_wait_list */,
                                               const cl_event * /* event_wait_list */,
                                               cl_event *       /* event */)) dlsym(handle, "clEnqueueCopyImageToBuffer");

    IAH();
    aclEnqueueCopyBufferToImage = (cl_int  (*)(cl_command_queue /* command_queue */,
                                               cl_mem           /* src_buffer */,
                                               cl_mem           /* dst_image */,
                                               size_t           /* src_offset */,
                                               const size_t *   /* dst_origin[3] */,
                                               const size_t *   /* region[3] */,
                                               cl_uint          /* num_events_in_wait_list */,
                                               const cl_event * /* event_wait_list */,
                                               cl_event *       /* event */)) dlsym(handle, "clEnqueueCopyBufferToImage");

    #if 0
    void *
(*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */,
                   cl_mem           /* buffer */,
                   cl_bool          /* blocking_map */,
                   cl_map_flags     /* map_flags */,
                   size_t           /* offset */,
                   size_t           /* size */,
                   cl_uint          /* num_events_in_wait_list */,
                   const cl_event * /* event_wait_list */,
                   cl_event *       /* event */,
                   cl_int *         /* errcode_ret */)) dlsym(handle,"");

void *
(*aclEnqueueMapImage)(cl_command_queue  /* command_queue */,
                  cl_mem            /* image */,
                  cl_bool           /* blocking_map */,
                  cl_map_flags      /* map_flags */,
                  const size_t *    /* origin[3] */,
                  const size_t *    /* region[3] */,
                  size_t *          /* image_row_pitch */,
                  size_t *          /* image_slice_pitch */,
                  cl_uint           /* num_events_in_wait_list */,
                  const cl_event *  /* event_wait_list */,
                  cl_event *        /* event */,
                  cl_int *          /* errcode_ret */)) dlsym(handle,"");
#endif

    IAH();
    aclEnqueueUnmapMemObject = (cl_int  (*)(cl_command_queue /* command_queue */,
                                            cl_mem           /* memobj */,
                                            void *           /* mapped_ptr */,
                                            cl_uint          /* num_events_in_wait_list */,
                                            const cl_event * /* event_wait_list */,
                                            cl_event *       /* event */)) dlsym(handle, "clEnqueueUnmapMemObject");

    IAH();
    aclEnqueueNDRangeKernel = (cl_int  (*)(cl_command_queue /* command_queue */,
                                           cl_kernel        /* kernel */,
                                           cl_uint          /* work_dim */,
                                           const size_t *   /* global_work_offset */,
                                           const size_t *   /* global_work_size */,
                                           const size_t *   /* local_work_size */,
                                           cl_uint          /* num_events_in_wait_list */,
                                           const cl_event * /* event_wait_list */,
                                           cl_event *       /* event */)) dlsym(handle, "clEnqueueNDRangeKernel");

    IAH();
    aclEnqueueTask = (cl_int  (*)(cl_command_queue  /* command_queue */,
                                  cl_kernel         /* kernel */,
                                  cl_uint           /* num_events_in_wait_list */,
                                  const cl_event *  /* event_wait_list */,
                                  cl_event *        /* event */)) dlsym(handle, "clEnqueueTask");

    IAH();
    aclEnqueueNativeKernel = (cl_int  (*)(cl_command_queue      /* command_queue */,
                                          void (CL_CALLBACK *   /*user_func*/)(void *),
            void *            /* args */,
            size_t            /* cb_args */,
            cl_uint           /* num_mem_objects */,
    const cl_mem *    /* mem_list */,
    const void **     /* args_mem_loc */,
    cl_uint           /* num_events_in_wait_list */,
    const cl_event *  /* event_wait_list */,
    cl_event *        /* event */)) dlsym(handle, "clEnqueueNativeKernel");

#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
    //#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
    /*
     *  WARNING:
     *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
     *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
     *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
     *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
     *
     *  Software developers previously relying on this API are instructed to set the command queue
     *  properties when creating the queue, instead.
     */
 IAH();
 aclSetCommandQueueProperty = (cl_int  (*)(cl_command_queue              /* command_queue */,
                              cl_command_queue_properties   /* properties */,
                              cl_bool                        /* enable */,
                              cl_command_queue_properties * /* old_properties */)) dlsym(handle,"clSetCommandQueueProperty");
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */

#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
    IAH();
 aclCreateImage2D = (cl_mem  (*)(cl_context              /* context */,
                    cl_mem_flags            /* flags */,
                    const cl_image_format * /* image_format */,
                    size_t                  /* image_width */,
                    size_t                  /* image_height */,
                    size_t                  /* image_row_pitch */,
                    void *                  /* host_ptr */,
                    cl_int *                /* errcode_ret */)) dlsym(handle,"clCreateImage2D");

 IAH();
 aclCreateImage3D = (cl_mem  (*)(cl_context              /* context */,
                    cl_mem_flags            /* flags */,
                    const cl_image_format * /* image_format */,
                    size_t                  /* image_width */,
                    size_t                  /* image_height */,
                    size_t                  /* image_depth */,
                    size_t                  /* image_row_pitch */,
                    size_t                  /* image_slice_pitch */,
                    void *                  /* host_ptr */,
                    cl_int *                /* errcode_ret */)) dlsym(handle,"clCreateImage3D");

 IAH();
 aclEnqueueMarker = (cl_int  (*)(cl_command_queue    /* command_queue */,
                    cl_event *          /* event */)) dlsym(handle,"clEnqueueMarker");

 IAH();
 aclEnqueueWaitForEvents = (cl_int  (*)(cl_command_queue /* command_queue */,
                           cl_uint          /* num_events */,
                           const cl_event * /* event_list */)) dlsym(handle,"clEnqueueWaitForEvents");

 IAH();
 aclEnqueueBarrier = (cl_int  (*)(cl_command_queue /* command_queue */)) dlsym(handle,"clEnqueueBarrier");

 IAH();
 aclUnloadCompiler = (cl_int  (*)(void)) dlsym(handle,"clUnloadCompiler");
#endif

#if 0
    void *
(*aclGetExtensionFunctionAddress)(const char * /* func_name */)) dlsym(handle,"");

#endif

    loadedCL = 1;
}

转载于:https://www.cnblogs.com/leviatan/p/11132584.html

你可能感兴趣的:(Android AARCH64 平台的 OpenCL 配置)