1.新建cuda项目
2. 添加环境
在vc++目录-->包含目录
添加
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include\opencv2
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include\opencv
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include
vc++目录-->库目录
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib
链接器-->输入-->附加依赖项
加入D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib下的所有lib文件,可以通过python代码来输出,然后复制到依赖项中
import os
path = r'D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib'
for file in os.listdir(path):
if file.split('.')[-1] == 'lib':
print(file)
3.代码这里我只能两幅图片进行了融合,两幅图片的单应性变换在上一个博客
里已经完成,我直接拿来用的,融合的原理是一样的,这部分代码我只进行了cuda加速,上次使用numpy矩阵融合4.8s,这次使用cuda融合只需要1s。有不明白的地方可以参考下我的另一篇博客
// main.cpp
#include
#include
#include "opencv2/core/core.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/flann/flann.hpp"
#include
#include
#include "opencv2/core/cuda.hpp"
#include
#include
using namespace cv::cuda;
using namespace std;
using namespace cv;
#include
#include
using namespace cv;
void swap_rb(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int left_x, int right_x, Stream& stream = Stream::Null());
int main()
{
Mat right = imread("srcImg.jpg");
Mat left = imread("warpImg.jpg");
namedWindow("src", WINDOW_NORMAL);
namedWindow("gpu", WINDOW_NORMAL);
int left_x = 313, right_x = 1079;
imshow("src", right);
clock_t start, finish;
start = clock();
GpuMat gpuRight, gpuLeft,output;
gpuRight.upload(right);
gpuLeft.upload(left);
swap_rb(gpuRight, gpuLeft, output, left_x, right_x);
Mat image;
output.download(image);
finish = clock();
cout <<"blend time:"<< (finish - start) / CLOCKS_PER_SEC <<"s"<< endl;
cout << "img.size:" << image.size() << endl;
imshow("gpu", image);
waitKey(0);
return 0;
}
// swap_rb.cpp
#include
#include
#include "opencv2/core/core.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/flann/flann.hpp"
#include
#include
#include "opencv2/core/cuda.hpp"
#include
using namespace cv::cuda;
using namespace std;
using namespace cv;
#include
#include
void swap_rb_caller(const PtrStepSz<uchar3>& src, const PtrStepSz<uchar3>& src1, PtrStep<uchar3> dst, int left_x, int right_x, cudaStream_t stream);
void swap_rb(const GpuMat& src, const GpuMat& src1, GpuMat& dst,int left_x, int right_x, Stream& stream = Stream::Null())
{
CV_Assert(src.type() == CV_8UC3);
dst.create(src.size(), src.type());
cudaStream_t s = StreamAccessor::getStream(stream);
swap_rb_caller(src, src1, dst, left_x, right_x, s);
}
//kernel.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include
#include
#include
#include
#include
#include "opencv2/core/cuda.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
//自定义内核函数
__global__ void swap_rb_kernel(const PtrStepSz<uchar3> src1, const PtrStepSz<uchar3> src2,PtrStep<uchar3> dst, int left_x, int right_x)
{
int j = threadIdx.x + blockIdx.x * blockDim.x;
int i = threadIdx.y + blockIdx.y * blockDim.y;
int src1pix = src1(i,j).x + src1(i,j).y + src1(i,j).z;
int src2pix = src2(i,j).x + src2(i,j).y + src2(i,j).z;
if (i>=0 && j>=0 )
{
//if (j < left_x)
if(src2pix==0)
{
dst(i, j) = src1(i, j);
}
else if (src1pix==0)//(j >= right_x)
{
dst(i, j) = src2(i, j);
}
else
{
//float d = (j - left_x) / (float)X;
float srcLen = abs(j-left_x);
float warpLen = abs(j-right_x);
float d = srcLen/(srcLen+warpLen);
dst(i, j).x = (uchar)(src1(i, j).x * (1 - d) + src2(i, j).x * d);
dst(i, j).y = (uchar)(src1(i, j).y * (1 - d) + src2(i, j ).y * d);
dst(i, j).z = (uchar)(src1(i, j).z * (1 - d) + src2(i, j).z * d);
//printf("HelloWorld! CPU %d %d %d\n", dst(i, j).x, dst(i, j).x, dst(i, j).x);
// dst(i, j) = (uchar)(src1(i, j) * (1 - d) + src2(i - Y, j - left)[k] * d);
}
}
}
void swap_rb_caller(const PtrStepSz<uchar3>& src, const PtrStepSz<uchar3>& src1, PtrStep<uchar3> dst, int left_x, int right_x, cudaStream_t stream)
{
dim3 block(32,8);
dim3 grid((src.cols + block.x - 1)/block.x,(src.rows + block.y - 1)/block.y);
swap_rb_kernel<<<grid,block,0,stream>>>(src, src1, dst, left_x, right_x);
if(stream == 0)
cudaDeviceSynchronize();
}