中心线提取--GPU加速

最近在学习CUDA编程,利用周末撸了一个GPU版本的中心线提取算法(灰度重心法)。
实际测试了100多张激光条纹图像的中心线提取,100w像素图像gpu的计算速度比cpu快1/3左右,400w像素图像gpu的计算速度是cpu的3倍左右,1600w像素图像gpu的计算速度是cpu的10~15倍。
下面放上main函数,其他文件放在下载链接里面了。毕竟是知识付费的时代,要几个积分也不算多吧哈哈。

/********************************************************************************
** @auth: taify
** @date: 2022/11/28
** @Ver : V1.0.0
** @desc: 中心线灰度重心法cpu和gpu版,gpu版本适合处理大图及一次性处理多张图的情况
*********************************************************************************/

#include 
#include 
#include 

#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 

#include "libcpu/centerline_cpu.h"
#include "libgpu/centerline_gpu.h"

//#define SHOW_RESULT


int main(int argc, char* argv[])
{
	int image_num = 132;
	std::vector<cv::Mat> mats(image_num);
	std::vector<cv::cuda::GpuMat> gpumats(image_num);

	for (size_t i = 0; i < image_num; i++)
	{
		cv::Mat image = cv::imread("./images/" + std::to_string(i + 1) + ".bmp", 0);
		cv::resize(image, image, cv::Size(image.cols , image.rows ));
		mats[i] = image;
		gpumats[i].upload(image);
	}

	clock_t t1 = clock();
	for (size_t i = 0; i < image_num; i++)
	{
		cv::GaussianBlur(mats[i], mats[i], cv::Size(3, 3), 0, 0);

		cv::threshold(mats[i], mats[i], 100, 255, cv::THRESH_TOZERO);

		float* pts = (float*)malloc(sizeof(float) * mats[i].cols);
		centerline_cpu(mats[i], pts);
		//std::cout << pts[0] << std::endl;

#ifdef SHOW_RESULT
		cv::cvtColor(mats[i], mats[i], cv::COLOR_GRAY2BGR);
		for (size_t j = 0; j < mats[i].cols; j++)
		{
			cv::circle(mats[i], cv::Point(j, pts[j]), 0.5, cv::Scalar(0, 0, 255), -1);
		}
		cv::imwrite("./cpu/" + std::to_string(i + 1) + ".bmp", mats[i]);
#endif // SHOW_RESULT

		free(pts);
	}
	clock_t t2 = clock();
	std::cout <<"cpu time cost: " << t2 - t1 << "ms" << std::endl;

	for (size_t i = 0; i < image_num; i++)
	{
		cv::Ptr<cv::cuda::Filter> guass_filter = cv::cuda::createGaussianFilter(CV_8U, CV_8U, cv::Size(3, 3), 0, 0);
		guass_filter->apply(gpumats[i], gpumats[i]);

		cv::cuda::threshold(gpumats[i], gpumats[i], 100, 255, cv::THRESH_TOZERO);

		float* pts= (float*)malloc(sizeof(float) * gpumats[i].cols);
		float* dev_pts;

		for (size_t i = 0; i < gpumats[i].cols; i++)
		{
			pts[i] = 0.0f;
		}

		cudaMalloc(&dev_pts, sizeof(float) * gpumats[i].cols);
		cudaMemcpy(dev_pts, pts, sizeof(float) * gpumats[i].cols, cudaMemcpyHostToDevice);

		dim3 threads(1024, 1, 1);
		dim3 blocks(gpumats[i].cols / threads.x, 1, 1);
		centerline_gpu << <blocks, threads >> > (gpumats[i], dev_pts);

		cudaMemcpy(pts, dev_pts, sizeof(float) * gpumats[i].cols, cudaMemcpyDeviceToHost);
		//std::cout << pts[0] << std::endl;

#ifdef SHOW_RESULT
		for (size_t j = 0; j < mats[i].cols; j++)
		{
			cv::circle(mats[i], cv::Point(j, pts[j]), 0.5, cv::Scalar(0, 0, 255), -1);
		}
		cv::imwrite("./gpu/" + std::to_string(i + 1) + ".bmp", mats[i]);
#endif // SHOW_RESULT

		free(pts);
	}

	clock_t t3 = clock();
	std::cout << "gpu time cost: " << t3 - t2 << "ms" << std::endl;

	return EXIT_SUCCESS;
}

工程下载地址:点击跳转
该工程需要配置CUDA和带GPU模块的OpenCV。
VS2019+CUDA11.1 Release x64编译的OpenCV4.5.5(带CUDA和contrib模块)
之前的文章:激光条纹中心线提取算法总结和复现

你可能感兴趣的:(3D,vision,CUDA,c++,CUDA,opencv)