cuda_opencv之向量相加

实现向量相加。。。实现与 矩阵相加一样~

 1 #include <stdlib.h>

 2 #include <stdio.h>

 3 #include <opencv/cv.hpp>

 4 #include <opencv/highgui.h>

 5 

 6 #include <cuda_runtime.h>

 7 #include <device_launch_parameters.h>

 8 

 9 using namespace std;

10 using namespace cv;

11 

12 #define N 10

13 

14 __global__ void Add_kernel(const int2* d_a, const int2* d_b,int2*  d_c ,int pp)

15 {

16     int x=threadIdx.x;

17     

18     if(x < pp)

19     {

20         d_c[x].x=d_a[x].x + d_b[x].x;

21         d_c[x].y=d_a[x].y + d_b[x].y;

22 

23     }

24 

25 }

26 int main()

27 {

28     Mat a(1,N,CV_32S,Scalar_<int>(0));

29     Mat b(1,N,CV_32S,Scalar_<int>(10));

30 

31     cout<<a<<endl;

32     cout<<endl;

33     cout<<b<<endl;

34     cout<<endl;

35 

36     size_t memSize = a.step * a.rows;

37 

38     int2* d_a = NULL;

39     int2* d_b = NULL;

40     int2* d_c = NULL;

41 

42     cudaMalloc((void**)&d_a,memSize);

43     cudaMalloc((void**)&d_b,memSize);

44     cudaMalloc((void**)&d_c,memSize);

45 

46     cudaMemcpy(d_a,a.data,memSize,cudaMemcpyHostToDevice);

47     cudaMemcpy(d_b,b.data,memSize,cudaMemcpyHostToDevice);

48 

49     Add_kernel<<<1,N>>>(d_a, d_b, d_c, N);

50     cudaMemcpy(a.data, d_c,memSize, cudaMemcpyDeviceToHost);

51     cout<<endl;

52     cout<<a<<endl;

53     system("pause");

54     return 0;

55 }

 

你可能感兴趣的:(opencv)