尝试了一下对两幅图像进行叠加融合:思路是用openc读取图像(Mat类型),拷贝到device中,进行叠加,再拷出来host中,显示结果。
下面就是完整的代码:
/***************************************** Copyright (c) 2016 Jingshuang Hu @filename:kernel.cu @datetime:2016.11.10 @author:HJS @e-mail:[email protected] @blog:http://blog.csdn.net/hujingshuang *****************************************/ #include <cuda_runtime.h> #include <device_launch_parameters.h> #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <iostream> using namespace std; using namespace cv; __global__ void imageAdd(uchar* img1, uchar* img2, uchar* imgres, int length) { // 一维数据索引计算(万能计算方法) int tid = blockIdx.z * (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z) \ + blockIdx.y * gridDim.x * (blockDim.x * blockDim.y * blockDim.z) \ + blockIdx.x * (blockDim.x * blockDim.y * blockDim.z) \ + threadIdx.z * (blockDim.x * blockDim.y) \ + threadIdx.y * blockDim.x \ + threadIdx.x; if (tid < length) { imgres[tid] = (img1[tid] + img2[tid]) / 2; } } int main() { // source images, gray Mat img1_host = imread("img1.jpg", IMREAD_GRAYSCALE); Mat img2_host = imread("img2.jpg", IMREAD_GRAYSCALE); int row = img1_host.rows; int col = img1_host.cols; int length = row * col; // memory size int memSize = length * sizeof(uchar); // device memory uchar* img1_device; uchar* img2_device; uchar* imgres_device; cudaMalloc((void**)&img1_device, memSize); cudaMalloc((void**)&img2_device, memSize); cudaMalloc((void**)&imgres_device, memSize); // copy host to device cudaMemcpy(img1_device, img1_host.data, memSize, cudaMemcpyHostToDevice); cudaMemcpy(img2_device, img2_host.data, memSize, cudaMemcpyHostToDevice); // setting parameters and run the kernel function dim3 grid(1 + (length / (32 * 32 + 1)), 1, 1); // grid dim3 block(32, 32, 1); // block imageAdd<<<grid, block>>>(img1_device, img2_device, imgres_device, length); // copy device to host Mat imgres_host = Mat::zeros(row, col, CV_8UC1); cudaMemcpy(imgres_host.data, imgres_device, memSize, cudaMemcpyDeviceToHost); // show source and result images imshow("img1", img1_host); imshow("img2", img2_host); imshow("imgres", imgres_host); waitKey(0); // free cudaFree(img1_device); cudaFree(img2_device); cudaFree(imgres_device); system("pause"); return 0; }显示结果:
另外,还要请教大家一个问题,就是在使用GPU写代码的时候,chrome总是崩溃,网上收了一下结果好多搞CUDA的遇到了,办法都试过一遍勒还是不能解决问题。迫使我把搜狗浏览器这个小三儿硬生生的扶成了正室,只有在不用GPU时,才用的chrome,我感觉到了它心里的委屈,各位大神有什么办法啊~