[题外话]近期申请了一个微信公众号:平凡程式人生。有兴趣的朋友可以关注,那里将会涉及更多更新OpenCL+OpenCV以及图像处理方面的文章。
Kernel程序是每个workitem需要执行的,它需要存储在以cl为后缀的文件中。该程序中kernel文件为ImageConvolution.cl。
Kernel内程序定义如下:
constsampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernelvoidconvolution(
__read_only image2d_t sourceImage,
__write_only image2d_t outputImage,
int cols,
int rows,
__constant float*filter,
int filterWidth);
变量sourceImage和outputImage为输入、输出图像的buffer;
变量cols和rows是所需处理图像的宽度和高度;
变量*filter指向存储卷积核的buffer;
变量filterWidth为卷积核的宽度;
这里在私有空间,定义了一个sampler_t变量,用于从输入图像buffer中读取图像数据。如下面的code:
pixel =read_imageui(sourceImage, mysampler, coords);
Kernel程序定义如下:
1. const sampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
2. __kernel void convolution(
3. __read_only image2d_t sourceImage,
4. __write_only image2d_t outputImage,
5. int cols,
6. int rows,
7. __constant float *filter,
8. int filterWidth)
9. {
10. //Store each work-item's unique row and column
11. int column = get_global_id(0);
12. int row = get_global_id(1);
13.
14. //Each work-item iterates around its local area based on the size of the filter
15. int2 coords; //Coordinates for accessing the image
16.
17. //Half the width of the filter is needed for indexing memory later
18. int halfWidth = (int)(filterWidth / 2);
19.
20. //All accesses to images return data as four-element vector
21. //(i.e., float4), although only the 'x' component will contain meaningful data in this code
22. uint4 sum = {0, 0, 0, 0};
23.
24. //Iterator for the filter
25. int filterIdx = 0;
26.
27. //Iterate the filter rows
28. for (int i = -halfWidth; i <= halfWidth; i++) {
29. coords.y = row + i;
30.
31. //Iterate over the filter columns
32. for (int j = -halfWidth; j <= halfWidth; j++) {
33. coords.x = column + j;
34. uint4 pixel;
35.
36. //Read a pixel from the image. A single channel image store the pixel
37. //in the 'x' coordinate of the returned vector
38. pixel = read_imageui(sourceImage, mysampler, coords);
39. sum.x += pixel.x * filter[filterIdx++];
40. }
41. }
42.
43. //Copy the datat to the output image if the work-item is in bounds
44. if (row < rows && column < cols)
45. {
46. coords.x = column;
47. coords.y = row;
48. write_imageui(outputImage, coords, sum);
49. }
50. }
Host端程序处理流程就是按照前面“程序设计”一节编写的。除了调用OpenCL+OpenCV的API函数,其他的地方都是按照C/C++语法编写的。
具体代码如下:
1. // ImageConvolution.cpp : 定义控制台应用程序的入口点。
2. //
3.
4. #include "stdafx.h"
5. #include
6. #include
7. #include
8.
9. #include
10.
11. #ifdef __APPLE__
12. #include
13. #else
14. #include
15. #endif
16.
17. using namespace cv;
18.
19. void DisplayPlatformInfo(
20. cl_platform_id id,
21. cl_platform_info name,
22. std::string str)
23. {
24. cl_int errNum;
25. std::size_t paramValueSize;
26.
27. errNum = clGetPlatformInfo(id, name, 0, NULL, ¶mValueSize);
28. if (errNum != CL_SUCCESS) {
29. std::cerr << "Failed to find OpenCL platform"
30. << str << "." << std::endl;
31. return;
32. }
33.
34. char *info = (char *)alloca(sizeof(char) * paramValueSize);
35. errNum = clGetPlatformInfo(id, name, paramValueSize, info, NULL);
36. if (errNum != CL_SUCCESS) {
37. std::cerr << "Failed to find OpenCL platform "
38. << str << "." << std::endl;
39. return;
40. }
41.
42. std::cout << "\t" << str << ":\t" << info << std::endl;
43.
44. return;
45. }
46.
47. int _tmain(int argc, _TCHAR* argv[])
48. {
49. cl_int ciErrNum;
50. const char *fileName = "ImageConvolution.cl";
51. int width = 0, height = 0;
52. const char* imageName = "F:\\code\\pic\\test01.jpg";
53. char *bufInput = NULL, *bufOutput = NULL;
54. const float filter[] = {-1,0,-1,0,4,0,-1,0,-1};
55.
56. //read one jpeg pic and store it in a Mat variable.
57. Mat img = imread(imageName);
58. if (!img.data) {
59. std::cout << "fail to open the file:" << imageName << std::endl;
60. return -1;
61. }
62.
63. //the type of img is RGB, convert to gray image.
64. Mat imgGray;
65. cvtColor(img, imgGray, CV_BGR2GRAY);
66. width = imgGray.cols;
67. height = imgGray.rows;
68. std::cout << "picture width: " << width << ", height: " << height << std::endl;
69.
70. //save the source data of original gray image.
71. FILE *yuvFileOrg = NULL;
72. fopen_s(&yuvFileOrg, "gray_org.yuv", "wb");
73. fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFileOrg);
74. fclose(yuvFileOrg);
75. yuvFileOrg = NULL;
76.
77. //display the original gray image in a window.
78. namedWindow( imageName, CV_WINDOW_AUTOSIZE );
79. imshow(imageName, imgGray);
80. //waitKey(0);
81.
82. //allocate the input buffer to store the original gray image
83. if (NULL == (bufInput = (char *)malloc(width * height * sizeof(char)))) {
84. std::cerr << "Failed to malloc buffer for input image. " << std::endl;
85. return -1;
86. }
87.
88. //allocate the output buffer to store the image rotated.
89. if (NULL == (bufOutput = (char *)malloc(width * height * sizeof(char)))) {
90. std::cerr << "Failed to malloc buffer for output image. " << std::endl;
91. return -1;
92. }
93.
94. //copy the data of gray image to the input buffer. initialize the output buffer by zero.
95. memcpy(bufInput, imgGray.data, width * height * sizeof(unsigned char));
96. memset(bufOutput, 0x0, width * height * sizeof(unsigned char));
97.
98. //use the first platform
99. cl_platform_id platform;
100. cl_platform_id *platforms;
101. cl_uint numPlatform;
102. ciErrNum = clGetPlatformIDs(0, NULL, &numPlatform); //get the number of platform
103. if (ciErrNum != CL_SUCCESS) {
104. std::cerr << "Failed to get the number of platform." << std::endl;
105. return -1;
106. }
107. std::cout << "The number of the platform is: " << numPlatform << std::endl;
108.
109. platforms = (cl_platform_id *)malloc(numPlatform * sizeof(cl_platform_id));
110. ciErrNum = clGetPlatformIDs(numPlatform, platforms, NULL);
111. if (ciErrNum != CL_SUCCESS) {
112. std::cerr << "Failed to get the ID of platform." << std::endl;
113. return -1;
114. }
115. for(cl_uint i = 0; i < numPlatform; i++) {
116. std::cout << "The platform " << i << ":" << std::endl;
117. DisplayPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE");
118. DisplayPlatformInfo(platforms[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION");
119. DisplayPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR");
120. DisplayPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS");
121. }
122.
123. platform = platforms[1]; //choose the platform of NVIDIA, support the OpenCL 1.2
124.
125. //use the first device
126. cl_device_id device;
127. ciErrNum = clGetDeviceIDs(
128. platform,
129. CL_DEVICE_TYPE_ALL,
130. 1,
131. &device,
132. NULL);
133. if (ciErrNum != CL_SUCCESS) {
134. std::cerr << "Failed to get the device." << std::endl;
135. return -1;
136. }
137.
138. cl_context_properties cps[3] = {
139. CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0
140. };
141. //create the context
142. cl_context ctx = clCreateContext(
143. cps,
144. 1,
145. &device,
146. NULL,
147. NULL,
148. &ciErrNum);
149. if (ciErrNum != CL_SUCCESS) {
150. std::cerr << "Failed to create the context." << std::endl;
151. return -1;
152. }
153.
154. //create the command queue
155. cl_command_queue myqueue = clCreateCommandQueue(
156. ctx,
157. device,
158. 0,
159. &ciErrNum);
160. if (ciErrNum != CL_SUCCESS) {
161. std::cerr << "Failed to create the queue." << std::endl;
162. return -1;
163. }
164.
165. //the convolution filter is 3x3
166. int filterWidth = 3;
167. int filterSize = filterWidth * filterWidth; //assume a square kernel
168.
169. //the image format describes how the data will be stored in memory
170. cl_image_format format;
171. format.image_channel_order = CL_R; //single channel
172. format.image_channel_data_type = CL_UNSIGNED_INT8; //float data type
173. cl_int status;
174. cl_image_desc desc;
175. desc.image_type = CL_MEM_OBJECT_IMAGE2D;
176. desc.image_width = width;
177. desc.image_height = height;
178. desc.image_depth = 0;
179. desc.image_array_size = 0;
180. desc.image_row_pitch = 0;
181. desc.image_slice_pitch = 0;
182. desc.num_mip_levels = 0;
183. desc.num_samples = 0;
184. desc.buffer = NULL;
185.
186. //create the image buffers for input and output data.
187. cl_mem bufferSourceImage = clCreateImage(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &format, &desc, bufInput, &status);
188. cl_mem bufferOutputImage = clCreateImage(ctx, CL_MEM_WRITE_ONLY, &format, &desc, bufOutput, &status);
189.
190. //create space for the 3x3 filter on the device
191. cl_mem bufferFilter = clCreateBuffer(
192. ctx,
193. 0,
194. filterSize * sizeof(float),
195. NULL,
196. NULL);
197.
198. //copy the source image to the device
199. size_t origin[3] = {0, 0, 0}; //offset within the image to copy from
200. size_t region[3] = {width, height, 1}; //elements to per dimension
201.
202. ciErrNum = clEnqueueWriteImage(
203. myqueue,
204. bufferSourceImage,
205. CL_FALSE,
206. origin,
207. region,
208. 0,
209. 0,
210. bufInput,
211. 0,
212. NULL,
213. NULL);
214. if (ciErrNum != CL_SUCCESS) {
215. std::cerr << "Failed to write the image to the device." << std::endl;
216. return -1;
217. }
218.
219. //copy the 3x3 filter to the device
220. ciErrNum = clEnqueueWriteBuffer(
221. myqueue,
222. bufferFilter,
223. CL_FALSE,
224. 0,
225. filterSize * sizeof(float),
226. filter,
227. 0,
228. NULL,
229. NULL);
230. if (ciErrNum != CL_SUCCESS) {
231. std::cerr << "Failed to write the filter to the device." << std::endl;
232. return -1;
233. }
234.
235. //open kernel file and read the content to a string variable.
236. std::ifstream kernelFile("ImageConvolution.cl", std::ios::in);
237. //std::ifstream kernelFile("ImageRotate.cl", std::ios::in);
238. if (!kernelFile.is_open()) {
239. std::cerr << "Failed to open file for reading: " << fileName << std::endl;
240. return NULL;
241. }
242. std::ostringstream oss;
243. oss << kernelFile.rdbuf();
244. std::string srcStdStr = oss.str();
245. const char *srcStr = srcStdStr.c_str();
246. kernelFile.close();
247.
248. //create the program with source code of kernel.
249. cl_program myprog = clCreateProgramWithSource(
250. ctx,
251. 1,
252. (const char**)&srcStr,
253. NULL,
254. &ciErrNum);
255. if (ciErrNum != CL_SUCCESS) {
256. std::cerr << "Failed to create the program." << std::endl;
257. return -1;
258. }
259.
260. //compile the program. passing NULL for the 'device_list' argument targets all devices in the context
261. ciErrNum = clBuildProgram(myprog, 0, NULL, NULL, NULL, NULL);
262. if (ciErrNum != CL_SUCCESS) {
263. std::cerr << "Failed to build the program." << std::endl;
264. return -1;
265. }
266.
267. //create the kernel
268. cl_kernel mykernel = clCreateKernel(
269. myprog,
270. "convolution",
271. &ciErrNum);
272. if (ciErrNum != CL_SUCCESS) {
273. std::cerr << "Failed to create the kernel." << std::endl;
274. return -1;
275. }
276.
277. //set the kernel arguments
278. clSetKernelArg(mykernel, 0, sizeof(cl_mem), (void *)&bufferSourceImage);
279. clSetKernelArg(mykernel, 1, sizeof(cl_mem), (void *)&bufferOutputImage);
280. clSetKernelArg(mykernel, 2, sizeof(cl_int), (void *)&width);
281. clSetKernelArg(mykernel, 3, sizeof(cl_int), (void *)&height);
282. clSetKernelArg(mykernel, 4, sizeof(cl_mem), (void *)&bufferFilter);
283. clSetKernelArg(mykernel, 5, sizeof(cl_int), (void *)&filterWidth);
284.
285. //set local and global workgroup sizes
286. size_t localws[2] = {1, 1};
287. size_t globalws[2] = {width, height};
288.
289. //execute the kernel
290. ciErrNum = clEnqueueNDRangeKernel(
291. myqueue,
292. mykernel,
293. 2,
294. NULL,
295. globalws,
296. localws,
297. 0,
298. NULL,
299. NULL);
300. if (ciErrNum != CL_SUCCESS) {
301. std::cerr << "Failed to execute the kernel." << std::endl;
302. return -1;
303. }
304.
305. //read the output data back to the host
306. ciErrNum = clEnqueueReadImage(
307. myqueue,
308. bufferOutputImage,
309. CL_TRUE,
310. origin,
311. region,
312. 0,
313. 0,
314. bufOutput,
315. 0,
316. NULL,
317. NULL);
318. if (ciErrNum != CL_SUCCESS) {
319. std::cerr << "Failed to read the image from the device." << std::endl;
320. return -1;
321. }
322.
323. //copy the output data from output buffer to Mat variable.
324. memcpy(imgGray.data, bufOutput, width * height * sizeof(unsigned char));
325.
326. //save the source data for gray image rotated
327. FILE *yuvFile = NULL;
328. fopen_s(&yuvFile, "gray.yuv", "wb");
329. fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFile);
330. fclose(yuvFile);
331. yuvFile = NULL;
332.
333. //save the gray image rotated.
334. imwrite("test_gray.jpg", imgGray);
335.
336. //show the gray image rotated.
337. const char *winName = "gray_image_convolution";
338. namedWindow(winName, CV_WINDOW_AUTOSIZE );
339. imshow(winName, imgGray);
340. waitKey(0);
341. destroyAllWindows();
342.
343. //release all resource
344. if (bufInput != NULL)
345. free(bufInput);
346.
347. if (bufOutput != NULL)
348. free(bufOutput);
349.
350. if (bufferSourceImage != 0)
351. clReleaseMemObject(bufferSourceImage);
352.
353. if (bufferOutputImage != 0)
354. clReleaseMemObject(bufferOutputImage);
355.
356. if (bufferFilter != 0)
357. clReleaseMemObject(bufferFilter);
358.
359. if (myqueue != 0)
360. clReleaseCommandQueue(myqueue);
361.
362. if (mykernel != 0)
363. clReleaseKernel(mykernel);
364.
365. if (myprog != 0)
366. clReleaseProgram(myprog);
367.
368. if (ctx != 0)
369. clReleaseContext(ctx);
370.
371. return 0;
372. }