用OpenCL来映射内存数据通常分为三步:
1. 调用函数clEnqueueMapBuffer或clEnqueueMapImage,将内存映射命令入列。
2. 使用memcpy等函数对主机内存的数据进行传输。
3. 调用clEnqueueUnmapObject函数解映射内存。
下面是映射和解映射函数的原型:
OpenCL不仅可以在主机内存和设备内存之间传输数据,还可以在同一设备或不同设备间对两个内存对象进行数据传输。
以下是各个函数的原型:
下面的代码完成的是创建两个缓存对象,将对象1的内容复制到对象2中。然后将对象2的内容映射到主机内存,最后将映射的内存复制到数组中。可以用下图来表示。
//初始化设备,创建context,queue等
...
float data_one[100], data_two[100], result_array[100];
cl_mem buffer_one, buffer_two;
void *mapped_memory;
cout << "data one: " << endl;
for (int i=0; i < 10; i++)
{
for (int j = 0; j < 10; j++)
{
data_one[i * 10 + j] = i * 10 + j;
cout << data_one[i * 10 + j] << " ";
}
cout << endl;
}
buffer_one = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(data_one), data_one, &err);
if (err < 0)
{
cout << "Failed to create buffer one." << err << endl;
return err;
}
buffer_two = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(data_two), data_two, &err);
if (err < 0)
{
cout << "Failed to create buffer two." << err << endl;
return err;
}
if (err < 0)
{
cout << "Failed to set kernel argument." << err << endl;
return err;
}
err = clSetKernelArg(found_kernel, 0, sizeof(cl_mem), &buffer_one);
err |= clSetKernelArg(found_kernel, 1, sizeof(cl_mem), &buffer_two);
err = clEnqueueTask(queue, found_kernel, 0, NULL, NULL);
if (err < 0)
{
cout << "Failed to enqueue task." << err << endl;
return err;
}
err = clEnqueueCopyBuffer(queue, buffer_one, buffer_two, 0, 0, sizeof(data_one), 0, NULL, NULL);
if (err < 0)
{
cout << "Failed to copy buffer from buffer one to buffer two." << err << endl;
return err;
}
mapped_memory = clEnqueueMapBuffer(queue, buffer_two, CL_TRUE, CL_MAP_READ, 0, sizeof(data_two), 0, NULL, NULL, &err);
if (err < 0)
{
cout << "Failed to map buffer two to host memory." << err << endl;
return err;
}
memcpy(result_array, mapped_memory, sizeof(data_two));
err = clEnqueueUnmapMemObject(queue, buffer_two, mapped_memory, 0, NULL, NULL);
if (err < 0)
{
cout << "Failed to unmap buffer two." << err << endl;
return err;
}
cout << endl << "result array: " << endl;
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 10; j++)
{
cout << result_array[i * 10 + j] << " ";
}
cout << endl;
}