1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
__kernel
void
low(__global
int
* A,
__global
int
* B,
__global
int
* C,
int
sum,
int
img_width,
int
kernel_width)
{
//获取索引号,这里是二维的,所以可以取两个
//否则另一个永远是0
int
col = get_global_id(0);
int
row = get_global_id(1);
int
stx = (kernel_width - kernel_width%2)/2;
int
sty = stx;
int
nx,ny;
int
totalR=0;
int
totalG=0;
int
totalB=0;
int
nid = 0;
totalR=0;totalG=0;totalB=0;
nid=0;
if
(col<=2 || row<=2 || col>=img_width-2 || row>=img_width-2)
{
B[row*img_width*3+col*3+0] = 0;
B[row*img_width*3+col*3+1] = 0;
B[row*img_width*3+col*3+2] = 0;
return
;
}
for
(ny=row-sty;ny<=row+sty;ny++)
{
for
(nx=col-stx;nx<=col+stx;nx++)
{
totalR += C[nid] * A[ny*img_width*3+nx*3+0];
totalG += C[nid] * A[ny*img_width*3+nx*3+1];
totalB += C[nid] * A[ny*img_width*3+nx*3+2];
nid++;
}
}
B[row*img_width*3+col*3+0] = min(255,totalR/sum);
B[row*img_width*3+col*3+1] = min(255,totalG/sum);
B[row*img_width*3+col*3+2] = min(255,totalB/sum);
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
|
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <string>
#include <conio.h>
#include <math.h>//数学库
#include <CL/cl.h>//包含CL的头文件
//调用freeimage
#include <freeimage.h>
using
namespace
std;
//8x8数组
const
int
dim_x = 256;
const
int
dim_y = 256;
const
int
kernel_x =5;
const
int
kernel_y =5;
static
int
buf_A[dim_x*dim_y*3];
static
int
buf_B[dim_x*dim_y*3];
static
int
buf_C[] = {
1,1,1,1,1,
1,4,4,4,1,
1,4,12,4,1,
1,4,4,4,1,
1,1,1,1,1
};
//加载图片
//以RGBA格式存储图片
static
bool
LoadImg(
const
char
* fname)
{
//初始化FreeImage
FreeImage_Initialise(TRUE);
//定义图片格式为未知
FREE_IMAGE_FORMAT fif = FIF_UNKNOWN;
//获取图片格式
fif = FreeImage_GetFileType(fname,0);
//根据获取格式读取图片数据
FIBITMAP* bitmap = FreeImage_Load(fif,fname,0);
if
(!bitmap)
{
printf
("load error!
");
return
false
;
}
int
x,y;
RGBQUAD m_rgb;
//获取图片长宽
int
width = (
int
)FreeImage_GetWidth(bitmap);
int
height = (
int
)FreeImage_GetHeight(bitmap);
//获取图片数据
//按RGBA格式保存到数组中
for
(y=0;y<height;y++)
{
for
(x=0;x<width;x++)
{
//获取像素值
FreeImage_GetPixelColor(bitmap,x,y,&m_rgb);
//将RGB值存入数组
buf_A[y*width*3+x*3+2] = m_rgb.rgbRed;
buf_A[y*width*3+x*3+1] = m_rgb.rgbGreen;
buf_A[y*width*3+x*3+0] = m_rgb.rgbBlue;
}
}
FreeImage_Unload(bitmap);
return
true
;
}
//保存图片
static
bool
SaveImg()
{
//初始化FreeImage
FreeImage_Initialise(TRUE);
FIBITMAP* bitmap =FreeImage_Allocate(dim_x,dim_y,32,8,8,8);
int
m,n;
for
(n=0;n<dim_y;n++)
{
BYTE
*bits =FreeImage_GetScanLine(bitmap,n);
for
(m=0;m<dim_x;m++)
{
bits[0] = buf_B[dim_x*3*n+m*3+0];
bits[1] = buf_B[dim_x*3*n+m*3+1];
bits[2] = buf_B[dim_x*3*n+m*3+2];
bits[3] = 255;
bits+=4;
}
}
//保存图片为PNG格式
if
(
false
==FreeImage_Save(FIF_PNG, bitmap,
"low.png"
, PNG_DEFAULT))
{
printf
("save image error
");
}
FreeImage_Unload(bitmap);
return
true
;
}
//从外部文件获取cl内核代码
bool
GetFileData(
const
char
* fname,string& str)
{
FILE
* fp =
fopen
(fname,
"r"
);
if
(fp==NULL)
{
printf
("no found file
");
return
false
;
}
while
(
feof
(fp)==0)
{
str +=
fgetc
(fp);
}
return
true
;
}
int
main()
{
if
(LoadImg(
"bk.png"
)==
false
)
{
printf
("error load bk.png!
");
return
0;
}
//先读外部CL核心代码,如果失败则退出。
//代码存buf_code里面
string code_file;
if
(
false
== GetFileData(
"low.cl"
,code_file))
{
printf
("Open low.cl error
");
return
0;
}
char
* buf_code =
new
char
[code_file.size()];
strcpy
(buf_code,code_file.c_str());
buf_code[code_file.size()-1] = NULL;
//声明CL所需变量。
cl_device_id device;
cl_platform_id platform_id = NULL;
cl_context context;
cl_command_queue cmdQueue;
cl_mem bufferA,bufferB,bufferC;
cl_program program;
cl_kernel kernel = NULL;
//我们使用的是二维向量
//设定向量大小(维数)
size_t
globalWorkSize[2];
globalWorkSize[0] = dim_x;
globalWorkSize[1] = dim_y;
cl_int err;
/*
定义输入变量和输出变量,并设定初值
*/
size_t
datasize =
sizeof
(
int
) * dim_x * dim_y * 3;
size_t
kernelsize =
sizeof
(
int
)*kernel_x*kernel_y;
int
n=0;
int
sum=0;
//计算卷积核元素之和
for
(n=0;n<25;n++)
{
sum += buf_C[n];
}
//step 1:初始化OpenCL
err = clGetPlatformIDs(1,&platform_id,NULL);
if
(err!=CL_SUCCESS)
{
cout<<
"clGetPlatformIDs error:"
<<err<<endl;
return
0;
}
//这次我们只用CPU来进行并行运算,当然你也可以该成GPU
clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_CPU,1,&device,NULL);
//step 2:创建上下文
context = clCreateContext(NULL,1,&device,NULL,NULL,NULL);
//step 3:创建命令队列
cmdQueue = clCreateCommandQueue(context,device,0,NULL);
//step 4:创建数据缓冲区
bufferA = clCreateBuffer(context,
CL_MEM_READ_ONLY,
datasize,NULL,NULL);
bufferB = clCreateBuffer(context,
CL_MEM_WRITE_ONLY,
datasize,NULL,NULL);
bufferC = clCreateBuffer(context,
CL_MEM_READ_ONLY,
kernelsize,NULL,NULL);
//step 5:将数据上传到缓冲区
clEnqueueWriteBuffer(cmdQueue,
bufferA,CL_FALSE,
0,datasize,
buf_A,0,
NULL,NULL);
clEnqueueWriteBuffer(cmdQueue,
bufferC,CL_FALSE,
0,kernelsize,
buf_C,0,
NULL,NULL);
//step 6:加载编译代码,创建内核调用函数
program = clCreateProgramWithSource(context,1,
(
const
char
**)&buf_code,
NULL,NULL);
clBuildProgram(program,1,&device,NULL,NULL,NULL);
kernel = clCreateKernel(program,
"low"
,NULL);
//step 7:设置参数,执行内核
clSetKernelArg(kernel,0,
sizeof
(cl_mem),&bufferA);
clSetKernelArg(kernel,1,
sizeof
(cl_mem),&bufferB);
clSetKernelArg(kernel,2,
sizeof
(cl_mem),&bufferC);
clSetKernelArg(kernel,3,
sizeof
(cl_int),&sum);
//卷积元素之和
clSetKernelArg(kernel,4,
sizeof
(cl_int),&dim_x);
//图片宽度
clSetKernelArg(kernel,5,
sizeof
(cl_int),&kernel_x);
//卷积核宽度
//注意这里第三个参数已经改成2,表示二维数据。
clEnqueueNDRangeKernel(cmdQueue,kernel,
2,NULL,
globalWorkSize,
NULL,0,NULL,NULL);
//step 8:取回计算结果
clEnqueueReadBuffer(cmdQueue,bufferB,CL_TRUE,0,
datasize,buf_B,0,NULL,NULL);
SaveImg();
//释放所有调用和内存
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseContext(context);
delete
buf_code;
return
0;
}
|
http://www.cmnsoft.com/article.php?id=39