图片融合 c++ cuda加速

1.新建cuda项目
图片融合 c++ cuda加速_第1张图片
2. 添加环境
vc++目录-->包含目录添加

D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include\opencv2
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include\opencv
D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\include

vc++目录-->库目录

D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib

链接器-->输入-->附加依赖项
加入D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib下的所有lib文件,可以通过python代码来输出,然后复制到依赖项中

import os

path = r'D:\Program Files\opencv\opencv-3.4.5\build_cuda\install\lib'

for file in os.listdir(path):
    if file.split('.')[-1] == 'lib':
        print(file)

3.代码这里我只能两幅图片进行了融合,两幅图片的单应性变换在上一个博客
里已经完成,我直接拿来用的,融合的原理是一样的,这部分代码我只进行了cuda加速,上次使用numpy矩阵融合4.8s,这次使用cuda融合只需要1s。有不明白的地方可以参考下我的另一篇博客

// main.cpp
#include 
#include 
#include "opencv2/core/core.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/flann/flann.hpp"
#include
#include

#include "opencv2/core/cuda.hpp"
#include
#include
using namespace cv::cuda;
using namespace std;
using namespace cv;

#include 
#include 

using namespace cv;

void swap_rb(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int left_x, int right_x, Stream& stream = Stream::Null());
int main()
{
	Mat right = imread("srcImg.jpg");
	Mat left = imread("warpImg.jpg");
	namedWindow("src", WINDOW_NORMAL);
	namedWindow("gpu", WINDOW_NORMAL);
	int left_x = 313, right_x = 1079;
	
	imshow("src", right);
	clock_t start, finish;
	start = clock();
	GpuMat gpuRight, gpuLeft,output;
	gpuRight.upload(right);
	gpuLeft.upload(left);
	swap_rb(gpuRight, gpuLeft, output, left_x, right_x);
	Mat image;
	output.download(image);
	finish = clock();
	cout <<"blend time:"<< (finish - start) / CLOCKS_PER_SEC <<"s"<< endl;
	cout << "img.size:" << image.size() << endl;
	imshow("gpu", image);
	waitKey(0);
	return 0;
	
}
// swap_rb.cpp
#include 
#include 
#include "opencv2/core/core.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/flann/flann.hpp"
#include
#include
#include "opencv2/core/cuda.hpp"
#include 

using namespace cv::cuda;
using namespace std;
using namespace cv;

#include 
#include 
void swap_rb_caller(const PtrStepSz<uchar3>& src, const PtrStepSz<uchar3>& src1, PtrStep<uchar3> dst, int left_x, int right_x, cudaStream_t stream);
void swap_rb(const GpuMat& src, const GpuMat& src1, GpuMat& dst,int left_x, int right_x, Stream& stream = Stream::Null())
{
	CV_Assert(src.type() == CV_8UC3);
	dst.create(src.size(), src.type());
	cudaStream_t s = StreamAccessor::getStream(stream);
	swap_rb_caller(src, src1, dst, left_x, right_x, s);
}
//kernel.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include 
#include
#include 
#include 
#include 
#include 
#include "opencv2/core/cuda.hpp"

using namespace std;
using namespace cv;
using namespace cv::cuda;


//自定义内核函数
__global__ void swap_rb_kernel(const PtrStepSz<uchar3> src1, const PtrStepSz<uchar3> src2,PtrStep<uchar3> dst, int left_x, int right_x)
{
    int j = threadIdx.x + blockIdx.x * blockDim.x;
    int i = threadIdx.y + blockIdx.y * blockDim.y;

	int src1pix = src1(i,j).x + src1(i,j).y + src1(i,j).z;
	int src2pix = src2(i,j).x + src2(i,j).y + src2(i,j).z;

	if (i>=0 && j>=0  )
	{
		//if (j < left_x)
		if(src2pix==0)
		{			
			dst(i, j) = src1(i, j);		
		}
		else if (src1pix==0)//(j >= right_x)
		{
				dst(i, j) = src2(i, j);		
		}
		else
		{
				//float d = (j - left_x) / (float)X;
				float srcLen = abs(j-left_x);
				float warpLen = abs(j-right_x);
				float d = srcLen/(srcLen+warpLen);

				dst(i, j).x = (uchar)(src1(i, j).x * (1 - d) + src2(i, j).x * d);
				dst(i, j).y = (uchar)(src1(i, j).y * (1 - d) + src2(i, j ).y * d);
				dst(i, j).z = (uchar)(src1(i, j).z * (1 - d) + src2(i, j).z * d);
				//printf("HelloWorld! CPU %d %d %d\n", dst(i, j).x, dst(i, j).x, dst(i, j).x);
				

				//    dst(i, j) = (uchar)(src1(i, j) * (1 - d) + src2(i - Y, j - left)[k] * d);
		}
	}
}

void swap_rb_caller(const PtrStepSz<uchar3>& src, const PtrStepSz<uchar3>& src1, PtrStep<uchar3> dst, int left_x, int right_x, cudaStream_t stream)
{
    dim3 block(32,8);
    dim3 grid((src.cols + block.x - 1)/block.x,(src.rows + block.y - 1)/block.y);

    swap_rb_kernel<<<grid,block,0,stream>>>(src, src1, dst, left_x, right_x);
    if(stream == 0)
        cudaDeviceSynchronize();
}

图片融合 c++ cuda加速_第2张图片

你可能感兴趣的:(OpenCV)