CUDA用于图像处理中的形态学开闭运算

#include <stdlib.h>
#include <stdio.h>
#include <windows.h>
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv2/opencv.hpp>
#include <opencv2/gpu/gpu.hpp>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
using namespace std;
#define width 800
#define height 600

__global__ static void erosion(uchar* d_src, uchar* d_dst, int len)
{
	unsigned int i = blockIdx.x*blockDim.x+threadIdx.x;
	unsigned int j = blockIdx.y*blockDim.y+threadIdx.y;
	int wth = (len-1)/2;
	if( i>0 && i < width && j>0 && j < height )
	{
		d_dst[j*width+i] = d_src[j*width+i];
	}
	if(i > wth && i < width - wth && j > wth && j < height - wth)
	{
		for(int w = -wth; w < wth+1; w++)
		{
			for(int h = -wth; h < wth+1; h++)
			{
				if(d_src[(j+h)*width+i+w] < d_dst[j*width+i])
					d_dst[j*width+i] = d_src[(j+h)*width+i+w];
			}
		}
	}
}

__global__ static void dilation(uchar* d_src, uchar* d_dst, int len)
{
	unsigned int i = blockIdx.x*blockDim.x+threadIdx.x;
	unsigned int j = blockIdx.y*blockDim.y+threadIdx.y;
	int wth = (len-1)/2;
	if( i>0 && i < width && j>0 && j < height )
	{
		d_dst[j*width+i] = d_src[j*width+i];
	}
	if(i > wth && i < width - wth && j > wth && j < height - wth)
	{
		for(int w = -wth; w < wth+1; w++)
		{
			for(int h = -wth; h < wth+1; h++)
			{
				if(d_src[(j+h)*width+i+w] > d_dst[j*width+i])
					d_dst[j*width+i] = d_src[(j+h)*width+i+w];
			}
		}
	}
}

void preprocess(uchar* d_src, uchar* d_tmp1, uchar* d_tmp2, uchar* d_dst)
{
	dim3 dimBlock(16, 16);
	dim3 dimGrid( (width+dimBlock.x-1)/dimBlock.x, (height+dimBlock.y-1)/dimBlock.y );
	erosion<<<dimGrid, dimBlock, 0>>>(d_src, d_tmp1, 3);
	dilation<<<dimGrid, dimBlock, 0>>>(d_tmp1, d_tmp2, 5);
	erosion<<<dimGrid, dimBlock, 0>>>(d_tmp2, d_dst, 3);

	cudaThreadSynchronize();
}

int main()
{
	IplImage* src = cvLoadImage("F:\\VS2010 projects\\source\\002.bmp", 1);
	IplImage* srcCopy = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1);  
	//转换成单通道  
	cvCvtColor(src, srcCopy, CV_RGB2GRAY);
	
	unsigned char* d_img_src;
	unsigned char* d_img_tmp1;
	unsigned char* d_img_tmp2;
	unsigned char* d_img_dst;
	cudaMalloc((void**)&d_img_src, width*height);
	cudaMalloc((void**)&d_img_tmp1, width*height);
	cudaMalloc((void**)&d_img_tmp2, width*height);
	cudaMalloc((void**)&d_img_dst, width*height);

	cudaMemcpy(d_img_src, srcCopy->imageData, width*height, cudaMemcpyHostToDevice);
	
	preprocess(d_img_src, d_img_tmp1, d_img_tmp2, d_img_dst);

	cudaMemcpy( srcCopy->imageData, d_img_dst, width*height, cudaMemcpyDeviceToHost);
	

	cvNamedWindow("test",CV_WINDOW_AUTOSIZE);
	cvShowImage("test",srcCopy);
	cvSaveImage("preprocess.bmp", srcCopy);
	cvWaitKey(0);
	cvDestroyAllWindows();

	cvReleaseImage(&src);
	cudaFree(d_img_src);
	cudaFree(d_img_tmp1);
	cudaFree(d_img_tmp2);
	cudaFree(d_img_dst);
	return 0;
	
}

你可能感兴趣的:(CUDA,膨胀,腐蚀,开运算,GPU并行)