opencv联合cuda进行图像混合操作
1
#include
<
cutil_inline.h
>
2
#include
<
cv.h
>
3
#include
<
cstdio
>
4
#include
<
iostream
>
5
#include
<
cutil.h
>
6
#include
<
ctime
>
7
#include
<
cstdlib
>
8
#include
<
highgui.h
>
9
#include
<
windows.h
>
10
11
#pragma comment(lib,
"
cuda.lib
"
)
12
#pragma comment(lib,
"
cudart.lib
"
)
13
#pragma comment(lib,
"
cutil32.lib
"
)
14
#pragma comment(lib,
"
cv.lib
"
)
15
#pragma comment(lib,
"
cxcore.lib
"
)
16
#pragma comment(lib,
"
highgui.lib
"
)
17
18
using
namespace
std;
19
20
__global__
void
main_kernel(uchar4
*
d_dataC, uchar4
*
d_dataA, uchar4
*
d_dataB,
int
width,
int
height)
21
{
22
unsigned int x = blockIdx.x*blockDim.x+threadIdx.x;
23
unsigned int y = blockIdx.y*blockDim.y+threadIdx.y;
24
float w = 0.2;
25
if( x>0 && x < width && y>0 && y < height )
26
{
27
d_dataC[y*width+x].x = (unsigned char)(w*d_dataA[y*width+x].x+(1-w)*d_dataB[y*width+x].x);
28
d_dataC[y*width+x].y = (unsigned char)(w*d_dataA[y*width+x].y+(1-w)*d_dataB[y*width+x].y);
29
d_dataC[y*width+x].z = (unsigned char)(w*d_dataA[y*width+x].z+(1-w)*d_dataB[y*width+x].z);
30
31
d_dataC[y*width+x].w = 0;
32
}
33
}
34
35
char
*
filenameA
=
"
01.JPG
"
;
36
char
*
filenameB
=
"
02.JPG
"
;
37
int
main()
38
{
39
IplImage *imageA = cvLoadImage(filenameA);
40
IplImage *imageB = cvLoadImage(filenameB);
41
42
uchar4* h_image_dataA =(uchar4*)malloc( (imageA->width)*(imageA->height)*sizeof(uchar4) );
43
for(int i=0;i <imageA->width;i++)
44
{
45
for(int j = 0; j <imageA->height; j++)
46
{
47
h_image_dataA[j*(imageA->width)+i].x = ((unsigned char*)(imageA->imageData+imageA->widthStep*j))[i*3];
48
h_image_dataA[j*(imageA->width)+i].y = ((unsigned char*)(imageA->imageData+imageA->widthStep*j))[i*3+1];
49
h_image_dataA[j*(imageA->width)+i].z = ((unsigned char*)(imageA->imageData+imageA->widthStep*j))[i*3+2];
50
h_image_dataA[j*(imageA->width)+i].w = 0;
51
}
52
}
53
54
uchar4* h_image_dataB =(uchar4*)malloc( (imageB->width)*(imageB->height)*sizeof(uchar4) );
55
for(int i=0;i <imageB->width;i++)
56
{
57
for(int j = 0; j <imageB->height; j++)
58
{
59
h_image_dataB[j*(imageB->width)+i].x = ((unsigned char*)(imageB->imageData+imageB->widthStep*j))[i*3];
60
h_image_dataB[j*(imageB->width)+i].y = ((unsigned char*)(imageB->imageData+imageB->widthStep*j))[i*3+1];
61
h_image_dataB[j*(imageB->width)+i].z = ((unsigned char*)(imageB->imageData+imageB->widthStep*j))[i*3+2];
62
h_image_dataB[j*(imageB->width)+i].w = 0;
63
}
64
}
65
66
uchar4* d_dataA = NULL;
67
CUDA_SAFE_CALL( cudaMalloc( (void**)&d_dataA, ( imageA->width*imageA->height*sizeof(uchar4) ) ) );
68
69
uchar4* d_dataB = NULL;
70
CUDA_SAFE_CALL( cudaMalloc( (void**)&d_dataB, ( imageB->width*imageB->height*sizeof(uchar4) ) ) );
71
72
uchar4* d_dataC = NULL;
73
CUDA_SAFE_CALL( cudaMalloc( (void**)&d_dataC, ( imageB->width*imageB->height*sizeof(uchar4) ) ) );
74
75
CUDA_SAFE_CALL( cudaMemcpy( d_dataA, h_image_dataA, imageA->width*imageA->height*sizeof(uchar4), cudaMemcpyHostToDevice) );
76
CUDA_SAFE_CALL( cudaMemcpy( d_dataB, h_image_dataB, imageB->width*imageB->height*sizeof(uchar4), cudaMemcpyHostToDevice) );
77
//
78
LARGE_INTEGER start_time;
79
LARGE_INTEGER end_time;
80
double start = GetTickCount();
81
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
82
dim3 dimBlock(16, 16, 1);
83
dim3 dimGrid( (imageA->width+dimBlock.x-1)/dimBlock.x, (imageA->height+dimBlock.y-1)/dimBlock.y );
84
main_kernel<<<dimGrid, dimBlock, 0>>>(d_dataC, d_dataA,d_dataB, imageA->width, imageA->height);
85
CUDA_SAFE_CALL(cudaThreadSynchronize());
86
float diff_time = 0.0f;
87
QueryPerformanceCounter((LARGE_INTEGER*)&end_time);
88
diff_time = (float)(((double)end_time.QuadPart - (double)start_time.QuadPart) / 1000000);
89
printf( "time=%f ms\n", diff_time);
90
91
double end = (GetTickCount() - start)/1000;
92
printf("time used:%f s\n",end);
93
94
IplImage* imageC = cvCreateImage(cvGetSize(imageA),imageA->depth, imageA->nChannels );
95
uchar4* h_image_dataC =(uchar4*)malloc( (imageC->width)*(imageC->height)*sizeof(uchar4) );
96
CUDA_SAFE_CALL( cudaMemcpy( h_image_dataC, d_dataC, imageC->width*imageC->height*sizeof(uchar4), cudaMemcpyDeviceToHost) );
97
for(int i=0;i <imageC->width;i++)
98
{
99
for(int j = 0; j <imageC->height; j++)
100
{
101
((unsigned char*)(imageC->imageData+imageC->widthStep*j))[i*3] = h_image_dataC[j*imageC->width+i].x;
102
((unsigned char*)(imageC->imageData+imageC->widthStep*j))[i*3+1] = h_image_dataC[j*imageC->width+i].y;
103
((unsigned char*)(imageC->imageData+imageC->widthStep*j))[i*3+2] = h_image_dataC[j*imageC->width+i].z;
104
}
105
}
106
107
cvNamedWindow("test",CV_WINDOW_AUTOSIZE);
108
cvShowImage("test",imageC);
109
110
cvWaitKey(0);
111
cvDestroyAllWindows();
112
113
CUDA_SAFE_CALL(cudaFree(d_dataA));
114
CUDA_SAFE_CALL(cudaFree(d_dataB));
115
CUDA_SAFE_CALL(cudaFree(d_dataC));
116
117
free(h_image_dataA);
118
free(h_image_dataB);
119
free(h_image_dataC);
120
cvReleaseImage(&imageA);
121
cvReleaseImage(&imageB);
122
cvReleaseImage(&imageC);
123
return 0;
124
}

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21



22

23

24

25

26



27

28

29

30

31

32

33

34

35

36

37

38



39

40

41

42

43

44



45

46



47

48

49

50

51

52

53

54

55

56



57

58



59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98



99

100



101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124
