【CUDA】两幅图像叠加

        尝试了一下对两幅图像进行叠加融合:思路是用opencv读取图像(Mat类型),拷贝到device中,进行叠加,再拷出来host中,显示结果。

下面就是完整的代码:

/*****************************************
Copyright (c) 2016 Jingshuang Hu

@filename:kernel.cu
@datetime:2016.11.10
@author:HJS
@e-mail:[email protected]
@blog:http://blog.csdn.net/hujingshuang
*****************************************/
#include 
#include 

#include 
#include 
#include 

#include 

using namespace std;
using namespace cv;

// gray     (overload)
__global__ void imageAdd(uchar* img1, uchar* img2, uchar* imgres, int length) {
    // 一维数据索引计算(万能计算方法)
    int tid = blockIdx.z * (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z) \
            + blockIdx.y * gridDim.x * (blockDim.x * blockDim.y * blockDim.z) \
            + blockIdx.x * (blockDim.x * blockDim.y * blockDim.z) \
            + threadIdx.z * (blockDim.x * blockDim.y) \
            + threadIdx.y * blockDim.x \
            + threadIdx.x;
    
    if (tid < length) {
        imgres[tid] = (img1[tid]  + img2[tid]) / 2;
    }
}

// rgb      (overload)
__global__ void imageAdd(uchar3* img1, uchar3* img2, uchar3* imgres, int length) {
    int tid = blockIdx.z * (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z) \
            + blockIdx.y * gridDim.x * (blockDim.x * blockDim.y * blockDim.z) \
            + blockIdx.x * (blockDim.x * blockDim.y * blockDim.z) \
            + threadIdx.z * (blockDim.x * blockDim.y) \
            + threadIdx.y * blockDim.x \
            + threadIdx.x;
    if (tid < length) {
        imgres[tid].x = (img1[tid].x + img2[tid].x) / 2;
        imgres[tid].y = (img1[tid].y + img2[tid].y) / 2;
        imgres[tid].z = (img1[tid].z + img2[tid].z) / 2;
    }
}

int main() {
    // source images, gray
    Mat img1_host = imread("img1.jpg", IMREAD_GRAYSCALE);
    Mat img2_host = imread("img2.jpg", IMREAD_GRAYSCALE);

    int row = img1_host.rows;
    int col = img1_host.cols;
    int length = row * col;

    // memory size
    int memSize = length * sizeof(uchar);

    // device memory
    uchar* img1_device;
    uchar* img2_device;
    uchar* imgres_device;

    cudaMalloc((void**)&img1_device, memSize);
    cudaMalloc((void**)&img2_device, memSize);
    cudaMalloc((void**)&imgres_device, memSize);

    // copy host to device
    cudaMemcpy(img1_device, img1_host.data, memSize, cudaMemcpyHostToDevice);
    cudaMemcpy(img2_device, img2_host.data, memSize, cudaMemcpyHostToDevice);

    // setting parameters and run the kernel function
    dim3 grid(1 + (length / (32 * 32 + 1)), 1, 1);      // grid
    dim3 block(32, 32, 1);                              // block
    imageAdd<<>>(img1_device, img2_device, imgres_device, length);

    // copy device to host
    Mat imgres_host = Mat::zeros(row, col, CV_8UC1);
    cudaMemcpy(imgres_host.data, imgres_device, memSize, cudaMemcpyDeviceToHost);

    // show source and result images
    imshow("img1", img1_host);
    imshow("img2", img2_host);
    imshow("imgres", imgres_host);
    waitKey(0);

    // rgb
    Mat img1rgb_host = imread("img1.jpg");
    Mat img2rgb_host = imread("img2.jpg");
    
    int rowrgb = img1rgb_host.rows;
    int colrgb = img1rgb_host.cols;
    int lengthrgb = rowrgb * colrgb;

    int memSizergb = length * sizeof(uchar3);

    uchar3* img1rgb_device;
    uchar3* img2rgb_device;
    uchar3* imgresrgb_device;
    cudaMalloc((void**)&img1rgb_device, memSizergb);
    cudaMalloc((void**)&img2rgb_device, memSizergb);
    cudaMalloc((void**)&imgresrgb_device, memSizergb);

    cudaMemcpy(img1rgb_device, img1rgb_host.data, memSizergb, cudaMemcpyHostToDevice);
    cudaMemcpy(img2rgb_device, img2rgb_host.data, memSizergb, cudaMemcpyHostToDevice);

    dim3 gridrgb(1 + (length / (32 * 32 + 1)), 1, 1);
    dim3 blockrgb(32, 32, 1);
    imageAdd<<>>(img1rgb_device, img2rgb_device, imgresrgb_device, lengthrgb);

    Mat imgresrgb_host = Mat::zeros(rowrgb, colrgb, CV_8UC3);
    cudaMemcpy(imgresrgb_host.data, imgresrgb_device, memSizergb, cudaMemcpyDeviceToHost);

    imshow("img1rgb", img1rgb_host);
    imshow("img2rgb", img2rgb_host);
    imshow("imgrgbres", imgresrgb_host);
    waitKey();

    // free
    cudaFree(img1_device);
    cudaFree(img2_device);
    cudaFree(imgres_device);

    cudaFree(img1rgb_device);
    cudaFree(img2rgb_device);
    cudaFree(imgresrgb_device);

    system("pause");
    return 0;
}

显示结果:

【CUDA】两幅图像叠加_第1张图片                  【CUDA】两幅图像叠加_第2张图片                  【CUDA】两幅图像叠加_第3张图片

【CUDA】两幅图像叠加_第4张图片                  【CUDA】两幅图像叠加_第5张图片                  【CUDA】两幅图像叠加_第6张图片

        另外,还要请教大家一个问题,就是在使用GPU写代码的时候,chrome总是崩溃,网上收了一下结果好多搞CUDA的遇到了,办法都试过一遍勒还是不能解决问题。迫使我把搜狗浏览器这个小三儿硬生生的扶成了正室,只有在不用GPU时,才用的chrome,我感觉到了它心里的委屈,各位大神有什么办法啊~

你可能感兴趣的:(CUDA)