使用OpenCL+OpenCV实现图像卷积(二)

[题外话]近期申请了一个微信公众号:平凡程式人生。有兴趣的朋友可以关注,那里将会涉及更多更新OpenCL+OpenCV以及图像处理方面的文章。


3、kernel程序代码

Kernel程序是每个workitem需要执行的,它需要存储在以cl为后缀的文件中。该程序中kernel文件为ImageConvolution.cl。

Kernel内程序定义如下:

constsampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

__kernelvoidconvolution(

       __read_only image2d_t sourceImage,

       __write_only image2d_t outputImage,

       int cols,

       int rows, 

       __constant float*filter,

int filterWidth);

变量sourceImage和outputImage为输入、输出图像的buffer;

变量cols和rows是所需处理图像的宽度和高度;

变量*filter指向存储卷积核的buffer;

变量filterWidth为卷积核的宽度;

这里在私有空间,定义了一个sampler_t变量,用于从输入图像buffer中读取图像数据。如下面的code:

pixel =read_imageui(sourceImage, mysampler, coords);

Kernel程序定义如下:

1.	const sampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;  
2.	__kernel void convolution(  
3.	    __read_only image2d_t sourceImage,  
4.	    __write_only image2d_t outputImage,  
5.	    int cols,  
6.	    int rows,     
7.	    __constant float *filter,  
8.	    int filterWidth)  
9.	{  
10.	    //Store each work-item's unique row and column  
11.	    int column = get_global_id(0);  
12.	    int row = get_global_id(1);  
13.	  
14.	    //Each work-item iterates around its local area based on the size of the filter  
15.	    int2 coords; //Coordinates for accessing the image  
16.	      
17.	    //Half the width of the filter is needed for indexing memory later  
18.	    int halfWidth = (int)(filterWidth / 2);  
19.	  
20.	    //All accesses to images return data as four-element vector  
21.	    //(i.e., float4), although only the 'x' component will contain meaningful data in this code  
22.	    uint4 sum = {0, 0, 0, 0};     
23.	  
24.	    //Iterator for the filter  
25.	    int filterIdx = 0;    
26.	  
27.	    //Iterate the filter rows  
28.	    for (int i = -halfWidth; i <= halfWidth; i++) {  
29.	        coords.y = row + i;  
30.	  
31.	        //Iterate over the filter columns  
32.	        for (int j = -halfWidth; j <= halfWidth; j++) {  
33.	            coords.x = column + j;  
34.	            uint4 pixel;  
35.	  
36.	            //Read a pixel from the image. A single channel image store the pixel  
37.	            //in the 'x' coordinate of the returned vector  
38.	            pixel = read_imageui(sourceImage, mysampler, coords);  
39.	            sum.x += pixel.x * filter[filterIdx++];           
40.	        }  
41.	    }  
42.	  
43.	    //Copy the datat to the output image if the work-item is in bounds  
44.	    if (row < rows && column < cols)   
45.	    {  
46.	        coords.x = column;  
47.	        coords.y = row;  
48.	        write_imageui(outputImage, coords, sum);  
49.	    }  
50.	}  

4、host端程序代码

Host端程序处理流程就是按照前面“程序设计”一节编写的。除了调用OpenCL+OpenCV的API函数,其他的地方都是按照C/C++语法编写的。

具体代码如下:

1.	// ImageConvolution.cpp : 定义控制台应用程序的入口点。  
2.	//  
3.	  
4.	#include "stdafx.h"  
5.	#include   
6.	#include   
7.	#include   
8.	  
9.	#include   
10.	  
11.	#ifdef __APPLE__  
12.	#include   
13.	#else  
14.	#include   
15.	#endif  
16.	  
17.	using namespace cv;  
18.	  
19.	void DisplayPlatformInfo(  
20.	    cl_platform_id id,  
21.	    cl_platform_info name,  
22.	    std::string str)  
23.	{  
24.	    cl_int errNum;  
25.	    std::size_t paramValueSize;  
26.	  
27.	    errNum = clGetPlatformInfo(id, name, 0, NULL, ¶mValueSize);  
28.	    if (errNum != CL_SUCCESS) {  
29.	        std::cerr << "Failed to find OpenCL platform"  
30.	            << str << "." << std::endl;  
31.	        return;  
32.	    }  
33.	  
34.	    char *info = (char *)alloca(sizeof(char) * paramValueSize);  
35.	    errNum = clGetPlatformInfo(id, name, paramValueSize, info, NULL);  
36.	    if (errNum != CL_SUCCESS) {  
37.	        std::cerr << "Failed to find OpenCL platform "  
38.	            << str << "." << std::endl;  
39.	        return;  
40.	    }  
41.	  
42.	    std::cout << "\t" << str << ":\t" << info << std::endl;  
43.	  
44.	    return;  
45.	}  
46.	  
47.	int _tmain(int argc, _TCHAR* argv[])  
48.	{  
49.	    cl_int ciErrNum;  
50.	    const char *fileName = "ImageConvolution.cl";  
51.	    int width = 0, height = 0;  
52.	    const char* imageName = "F:\\code\\pic\\test01.jpg";  
53.	    char *bufInput = NULL, *bufOutput = NULL;  
54.	    const float filter[] = {-1,0,-1,0,4,0,-1,0,-1};   
55.	      
56.	    //read one jpeg pic and store it in a Mat variable.  
57.	    Mat img = imread(imageName);  
58.	    if (!img.data) {  
59.	        std::cout << "fail to open the file:" << imageName << std::endl;  
60.	        return -1;  
61.	    }  
62.	  
63.	    //the type of img is RGB, convert to gray image.  
64.	    Mat imgGray;  
65.	    cvtColor(img, imgGray, CV_BGR2GRAY);  
66.	    width = imgGray.cols;  
67.	    height = imgGray.rows;  
68.	    std::cout << "picture width: " << width << ", height: " << height << std::endl;  
69.	      
70.	    //save the source data of original gray image.  
71.	    FILE *yuvFileOrg = NULL;  
72.	    fopen_s(&yuvFileOrg, "gray_org.yuv", "wb");  
73.	    fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFileOrg);  
74.	    fclose(yuvFileOrg);  
75.	    yuvFileOrg = NULL;    
76.	  
77.	    //display the original gray image in a window.  
78.	    namedWindow( imageName, CV_WINDOW_AUTOSIZE );  
79.	    imshow(imageName, imgGray);  
80.	    //waitKey(0);  
81.	  
82.	    //allocate the input buffer to store the original gray image  
83.	    if (NULL == (bufInput = (char *)malloc(width * height * sizeof(char)))) {  
84.	        std::cerr << "Failed to malloc buffer for input image. " << std::endl;  
85.	        return -1;  
86.	    }  
87.	  
88.	    //allocate the output buffer to store the image rotated.  
89.	    if (NULL == (bufOutput = (char *)malloc(width * height * sizeof(char)))) {  
90.	        std::cerr << "Failed to malloc buffer for output image. " << std::endl;  
91.	        return -1;  
92.	    }  
93.	  
94.	    //copy the data of gray image to the input buffer. initialize the output buffer by zero.   
95.	    memcpy(bufInput, imgGray.data, width * height * sizeof(unsigned char));  
96.	    memset(bufOutput, 0x0, width * height * sizeof(unsigned char));  
97.	      
98.	    //use the first platform  
99.	    cl_platform_id platform;  
100.	    cl_platform_id *platforms;  
101.	    cl_uint numPlatform;  
102.	    ciErrNum = clGetPlatformIDs(0, NULL, &numPlatform); //get the number of platform  
103.	    if (ciErrNum != CL_SUCCESS) {  
104.	        std::cerr << "Failed to get the number of platform." << std::endl;  
105.	        return -1;  
106.	    }  
107.	    std::cout << "The number of the platform is: " << numPlatform << std::endl;  
108.	  
109.	    platforms = (cl_platform_id *)malloc(numPlatform * sizeof(cl_platform_id));  
110.	    ciErrNum = clGetPlatformIDs(numPlatform, platforms, NULL);  
111.	    if (ciErrNum != CL_SUCCESS) {  
112.	        std::cerr << "Failed to get the ID of platform." << std::endl;  
113.	        return -1;  
114.	    }  
115.	    for(cl_uint i = 0; i < numPlatform; i++) {  
116.	        std::cout << "The platform " << i << ":" << std::endl;  
117.	        DisplayPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE");  
118.	        DisplayPlatformInfo(platforms[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION");  
119.	        DisplayPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR");  
120.	        DisplayPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS");  
121.	    }  
122.	  
123.	    platform = platforms[1]; //choose the platform of NVIDIA, support the OpenCL 1.2  
124.	  
125.	    //use the first device  
126.	    cl_device_id device;  
127.	    ciErrNum = clGetDeviceIDs(  
128.	        platform,  
129.	        CL_DEVICE_TYPE_ALL,  
130.	        1,  
131.	        &device,  
132.	        NULL);  
133.	    if (ciErrNum != CL_SUCCESS) {  
134.	        std::cerr << "Failed to get the device." << std::endl;  
135.	        return -1;  
136.	    }  
137.	  
138.	    cl_context_properties cps[3] = {  
139.	        CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0  
140.	    };  
141.	    //create the context  
142.	    cl_context ctx = clCreateContext(  
143.	        cps,  
144.	        1,  
145.	        &device,  
146.	        NULL,  
147.	        NULL,  
148.	        &ciErrNum);  
149.	    if (ciErrNum != CL_SUCCESS) {  
150.	        std::cerr << "Failed to create the context." << std::endl;  
151.	        return -1;  
152.	    }  
153.	  
154.	    //create the command queue  
155.	    cl_command_queue myqueue = clCreateCommandQueue(  
156.	        ctx,  
157.	        device,  
158.	        0,  
159.	        &ciErrNum);  
160.	    if (ciErrNum != CL_SUCCESS) {  
161.	        std::cerr << "Failed to create the queue." << std::endl;  
162.	        return -1;  
163.	    }  
164.	  
165.	    //the convolution filter is 3x3  
166.	    int filterWidth = 3;  
167.	    int filterSize = filterWidth * filterWidth; //assume a square kernel  
168.	  
169.	    //the image format describes how the data will be stored in memory  
170.	    cl_image_format format;  
171.	    format.image_channel_order = CL_R; //single channel  
172.	    format.image_channel_data_type = CL_UNSIGNED_INT8; //float  data type  
173.	    cl_int status;  
174.	    cl_image_desc desc;  
175.	    desc.image_type = CL_MEM_OBJECT_IMAGE2D;  
176.	    desc.image_width = width;  
177.	    desc.image_height = height;  
178.	    desc.image_depth = 0;  
179.	    desc.image_array_size = 0;  
180.	    desc.image_row_pitch = 0;  
181.	    desc.image_slice_pitch = 0;  
182.	    desc.num_mip_levels = 0;  
183.	    desc.num_samples = 0;  
184.	    desc.buffer = NULL;  
185.	  
186.	    //create the image buffers for input and output data.  
187.	    cl_mem bufferSourceImage = clCreateImage(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &format, &desc, bufInput, &status);  
188.	    cl_mem bufferOutputImage = clCreateImage(ctx, CL_MEM_WRITE_ONLY, &format, &desc, bufOutput, &status);  
189.	     
190.	    //create space for the 3x3 filter on the device  
191.	    cl_mem bufferFilter = clCreateBuffer(  
192.	        ctx,  
193.	        0,  
194.	        filterSize * sizeof(float),  
195.	        NULL,  
196.	        NULL);  
197.	  
198.	    //copy the source image to the device  
199.	    size_t origin[3] = {0, 0, 0}; //offset within the image to copy from  
200.	    size_t region[3] = {width, height, 1}; //elements to per dimension  
201.	  
202.	    ciErrNum = clEnqueueWriteImage(  
203.	        myqueue,  
204.	        bufferSourceImage,  
205.	        CL_FALSE,  
206.	        origin,  
207.	        region,  
208.	        0,  
209.	        0,  
210.	        bufInput,  
211.	        0,  
212.	        NULL,  
213.	        NULL);  
214.	    if (ciErrNum != CL_SUCCESS) {  
215.	        std::cerr << "Failed to write the image to the device." << std::endl;  
216.	        return -1;  
217.	    }  
218.	      
219.	    //copy the 3x3 filter to the device  
220.	    ciErrNum = clEnqueueWriteBuffer(  
221.	        myqueue,  
222.	        bufferFilter,  
223.	        CL_FALSE,  
224.	        0,  
225.	        filterSize * sizeof(float),  
226.	        filter,  
227.	        0,  
228.	        NULL,  
229.	        NULL);  
230.	    if (ciErrNum != CL_SUCCESS) {  
231.	        std::cerr << "Failed to write the filter to the device." << std::endl;  
232.	        return -1;  
233.	    }  
234.	  
235.	    //open kernel file and read the content to a string variable.  
236.	    std::ifstream kernelFile("ImageConvolution.cl", std::ios::in);  
237.	    //std::ifstream kernelFile("ImageRotate.cl", std::ios::in);  
238.	    if (!kernelFile.is_open()) {  
239.	        std::cerr << "Failed to open file for reading: " << fileName << std::endl;  
240.	        return NULL;  
241.	    }  
242.	    std::ostringstream oss;  
243.	    oss << kernelFile.rdbuf();  
244.	    std::string srcStdStr = oss.str();  
245.	    const char *srcStr = srcStdStr.c_str();  
246.	    kernelFile.close();  
247.	  
248.	    //create the program with source code of kernel.  
249.	    cl_program myprog = clCreateProgramWithSource(  
250.	        ctx,  
251.	        1,  
252.	        (const char**)&srcStr,  
253.	        NULL,  
254.	        &ciErrNum);  
255.	    if (ciErrNum != CL_SUCCESS) {  
256.	        std::cerr << "Failed to create the program." << std::endl;  
257.	        return -1;  
258.	    }  
259.	  
260.	    //compile the program. passing NULL for the 'device_list' argument targets all devices in the context  
261.	    ciErrNum = clBuildProgram(myprog, 0, NULL, NULL, NULL, NULL);  
262.	    if (ciErrNum != CL_SUCCESS) {  
263.	        std::cerr << "Failed to build the program." << std::endl;  
264.	        return -1;  
265.	    }  
266.	  
267.	    //create the kernel  
268.	    cl_kernel mykernel = clCreateKernel(  
269.	        myprog,  
270.	        "convolution",  
271.	        &ciErrNum);  
272.	    if (ciErrNum != CL_SUCCESS) {  
273.	        std::cerr << "Failed to create the kernel." << std::endl;  
274.	        return -1;  
275.	    }  
276.	  
277.	    //set the kernel arguments  
278.	    clSetKernelArg(mykernel, 0, sizeof(cl_mem), (void *)&bufferSourceImage);  
279.	    clSetKernelArg(mykernel, 1, sizeof(cl_mem), (void *)&bufferOutputImage);  
280.	    clSetKernelArg(mykernel, 2, sizeof(cl_int), (void *)&width);  
281.	    clSetKernelArg(mykernel, 3, sizeof(cl_int), (void *)&height);   
282.	    clSetKernelArg(mykernel, 4, sizeof(cl_mem), (void *)&bufferFilter);   
283.	    clSetKernelArg(mykernel, 5, sizeof(cl_int), (void *)&filterWidth);  
284.	  
285.	    //set local and global workgroup sizes  
286.	    size_t localws[2] = {1, 1};  
287.	    size_t globalws[2] = {width, height};  
288.	  
289.	    //execute the kernel  
290.	    ciErrNum = clEnqueueNDRangeKernel(  
291.	        myqueue,  
292.	        mykernel,  
293.	        2,  
294.	        NULL,  
295.	        globalws,  
296.	        localws,  
297.	        0,  
298.	        NULL,  
299.	        NULL);  
300.	    if (ciErrNum != CL_SUCCESS) {  
301.	        std::cerr << "Failed to execute the kernel." << std::endl;  
302.	        return -1;  
303.	    }  
304.	  
305.	    //read the output data back to the host  
306.	    ciErrNum = clEnqueueReadImage(  
307.	        myqueue,  
308.	        bufferOutputImage,  
309.	        CL_TRUE,  
310.	        origin,  
311.	        region,  
312.	        0,  
313.	        0,  
314.	        bufOutput,  
315.	        0,  
316.	        NULL,  
317.	        NULL);  
318.	    if (ciErrNum != CL_SUCCESS) {  
319.	        std::cerr << "Failed to read the image from the device." << std::endl;  
320.	        return -1;  
321.	    }  
322.	      
323.	    //copy the output data from output buffer to Mat variable.   
324.	    memcpy(imgGray.data, bufOutput, width * height * sizeof(unsigned char));  
325.	  
326.	    //save the source data for gray image rotated  
327.	    FILE *yuvFile = NULL;  
328.	    fopen_s(&yuvFile, "gray.yuv", "wb");  
329.	    fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFile);  
330.	    fclose(yuvFile);  
331.	    yuvFile = NULL;  
332.	  
333.	    //save the gray image rotated.  
334.	    imwrite("test_gray.jpg", imgGray);  
335.	  
336.	    //show the gray image rotated.  
337.	    const char *winName = "gray_image_convolution";  
338.	    namedWindow(winName, CV_WINDOW_AUTOSIZE );  
339.	    imshow(winName, imgGray);     
340.	    waitKey(0);  
341.	    destroyAllWindows();  
342.	  
343.	    //release all resource  
344.	    if (bufInput != NULL)  
345.	        free(bufInput);  
346.	  
347.	    if (bufOutput != NULL)  
348.	        free(bufOutput);  
349.	  
350.	    if (bufferSourceImage != 0)  
351.	        clReleaseMemObject(bufferSourceImage);  
352.	  
353.	    if (bufferOutputImage != 0)  
354.	        clReleaseMemObject(bufferOutputImage);  
355.	  
356.	    if (bufferFilter != 0)  
357.	        clReleaseMemObject(bufferFilter);  
358.	  
359.	    if (myqueue != 0)  
360.	        clReleaseCommandQueue(myqueue);  
361.	  
362.	    if (mykernel != 0)  
363.	        clReleaseKernel(mykernel);  
364.	  
365.	    if (myprog != 0)  
366.	        clReleaseProgram(myprog);  
367.	  
368.	    if (ctx != 0)  
369.	        clReleaseContext(ctx);  
370.	  
371.	    return 0;  
372.	}  

(未完待续)

你可能感兴趣的:(Opencl)