cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀

cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀

  • 代码
  • linux如何编译cuda和opencv代码
  • 耗时情况

代码

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include 
#include 
#include 
//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include 
using namespace std;
using namespace cv;

//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{
    //Grid中x方向上的索引
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
    //Grid中y方向上的索引
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;

    int elementWidth = erodeElement.width;
    int elementHeight = erodeElement.height;
    int halfEW = elementWidth / 2;
    int halfEH = elementHeight / 2;
    //初始化输出图
    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;
    //防止越界
    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)
    {
        for (int i = -halfEH; i < halfEH + 1; i++)
        {
            for (int j = -halfEW; j < halfEW + 1; j++)
            {
                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex])
                {
                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];
                }
            }
        }
    }
}

//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{
    //Grid中x方向上的索引
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
    //Grid中y方向上的索引
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;

    int elementWidth = dilateElement.width;
    int elementHeight = dilateElement.height;
    int halfEW = elementWidth / 2;
    int halfEH = elementHeight / 2;
    //初始化输出图
    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;
    //防止越界
    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)
    {
        for (int i = -halfEH; i < halfEH + 1; i++)
        {
            for (int j = -halfEW; j < halfEW + 1; j++)
            {
                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex])
                {
                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];
                }
            }
        }
    }
}


int main()
{
    Mat srcImg = imread("1.jpg");//输入图片
    Mat grayImg = imread("1.jpg", 0);//输入的灰度图

	cv::namedWindow("srcImg", 0);
	cv::imshow("srcImg", srcImg);
	cv::waitKey(1000);
	
	cv::namedWindow("grayImg", 0);
	cv::imshow("grayImg", grayImg);
	cv::waitKey(1000);
	
    unsigned char *d_in;//输入图片在GPU内的内存
    unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存
    unsigned char *d_out2;//膨胀后输出图片在GPU内的内存

    int imgWidth = grayImg.cols;
    int imgHeight = grayImg.rows;

    Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存
    Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存
    //在GPU中开辟内存
    cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));
    //将输入图片传入GPU
    cudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);
    //定义block中thread的分布
    dim3 threadsPerBlock(32, 32);
    //根据输入图片的宽高定义block的大小
    dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);
    //算子大小
    Size Element(3, 5);
    //CUDA腐蚀
    erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);
    //将结果传回CPU
    cudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);
    //CPU内腐蚀(OpenCV实现)
    Mat erodeImg;
    Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));
    erode(grayImg, erodeImg, element);
    //CUDA膨胀
    dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);
    //将结果传回CPU
    cudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);
	
	
    //CPU内膨胀(OpenCV实现)
    Mat dilateImg;
    dilate(grayImg, dilateImg, element);
	
	cv::namedWindow("dilateImg11111", 0);
	cv::imshow("dilateImg11111", dilateImg);
	cv::waitKey(1000);
	cv::waitKey(0);
    return 0;
}




linux如何编译cuda和opencv代码

 nvcc  `pkg-config opencv4 --cflags --libs` test5.cu -o test5 
 #或者
 nvcc `pkg-config --libs opencv4` -L. -L/usr/local/cuda/lib -lcuda -lcudart `pkg-config --cflags opencv4` -I. -I/usr/local/cuda/include test5.cu -o test5 

耗时情况

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include 
#include 
#include 
#include 

//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include 
using namespace std;
using namespace cv;

//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{
    //Grid中x方向上的索引
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
    //Grid中y方向上的索引
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;

    int elementWidth = erodeElement.width;
    int elementHeight = erodeElement.height;
    int halfEW = elementWidth / 2;
    int halfEH = elementHeight / 2;
    //初始化输出图
    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;
    //防止越界
    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)
    {
        for (int i = -halfEH; i < halfEH + 1; i++)
        {
            for (int j = -halfEW; j < halfEW + 1; j++)
            {
                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex])
                {
                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];
                }
            }
        }
    }
}

//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{
    //Grid中x方向上的索引
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
    //Grid中y方向上的索引
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;

    int elementWidth = dilateElement.width;
    int elementHeight = dilateElement.height;
    int halfEW = elementWidth / 2;
    int halfEH = elementHeight / 2;
    //初始化输出图
    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;
    //防止越界
    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)
    {
        for (int i = -halfEH; i < halfEH + 1; i++)
        {
            for (int j = -halfEW; j < halfEW + 1; j++)
            {
                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex])
                {
                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];
                }
            }
        }
    }
}


int main()
{
    Mat srcImg = imread("1.jpg");//输入图片
    Mat grayImg = imread("1.jpg", 0);//输入的灰度图

	cv::namedWindow("srcImg", 0);
	cv::imshow("srcImg", srcImg);
	cv::waitKey(1000);
	
	cv::namedWindow("grayImg", 0);
	cv::imshow("grayImg", grayImg);
	cv::waitKey(1000);
	
    unsigned char *d_in;//输入图片在GPU内的内存
    unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存
    unsigned char *d_out2;//膨胀后输出图片在GPU内的内存

    int imgWidth = grayImg.cols;
    int imgHeight = grayImg.rows;

    Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存
    Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存
	
	const clock_t time_1 = clock();    

    //在GPU中开辟内存
    cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));
    cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));
    //将输入图片传入GPU
    cudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);
    //定义block中thread的分布
    dim3 threadsPerBlock(32, 32);
    //根据输入图片的宽高定义block的大小
    dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);
    //算子大小
    Size Element(3, 5);
    //CUDA腐蚀
	
	const clock_t time_5 = clock();  
    erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);
	const clock_t time_6 = clock();   
	float diff_3 =(double)( time_6 - time_5 )/1000.0;  
	printf("\n\n[ALG][%s][%4d]diff_3 = %f ms \n",__FUNCTION__, __LINE__, diff_3);
	
    //将结果传回CPU
    cudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);
	const clock_t time_2 = clock();   
	float diff_1 = (double)(time_2 - time_1 )/1000.0; 
	printf("\n\n[ALG][%s][%4d]diff_1 = %f ms\n",__FUNCTION__, __LINE__, diff_1);
	
	
    //CPU内腐蚀(OpenCV实现)
	const clock_t time_3 = clock();  
    Mat erodeImg;
    Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));
    erode(grayImg, erodeImg, element);
	
	const clock_t time_4 = clock();   
	float diff_2 =(double)( time_4 - time_3 )/1000.0;  
	printf("\n\n[ALG][%s][%4d]diff_2 = %f ms \n",__FUNCTION__, __LINE__, diff_2);
	
    //CUDA膨胀
    dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);
    //将结果传回CPU
    cudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);
	
	
    //CPU内膨胀(OpenCV实现)
    Mat dilateImg;
    dilate(grayImg, dilateImg, element);
	imwrite("dilateImg_230117.jpg", dilateImg);
	
	cv::namedWindow("dilateImg11111", 0);
	cv::imshow("dilateImg11111", dilateImg);
	cv::waitKey(1000);
	cv::waitKey(0);
    return 0;
}


cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀_第1张图片

参考:https://blog.csdn.net/MGotze/article/details/76448702

你可能感兴趣的:(cuda学习笔记,学习,opencv,人工智能)