本文所使用的环境:Ubuntu18.04+CUDA10.1+OpenCV3.4.11+QtCreator5
本文针对的是在OpenCV中最常用的图片类型:CV_8UC1和CV_8UC3。在cpu上,可以用cv::Mat::at或者cv::Mat::ptr或者迭代器来对图像进行逐个像素的访问和处理,但是想要移植到gpu里,又不使用cv::cuda模块,就只能用基本数据类型(比如uchar和uchar3),关键在于指针的传递。为了照顾初学者,先不使用shared memory和cuda stream等概念,只用global memory和默认流。
查到的资料说,无论图片是什么类型的,它的指针,即cv::Mat::data,默认都是uchar* 类型(我做过测试验证过,没出毛病,应该跟数据存储方式有关),用的时候可以用强制类型转换成需要的类型,比如uchar3* 。
先上原图
//这是用uchar*类型的指针进行传递 BRG变成RGB
#include
#include
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"
using namespace std;
__global__ void deal_image1(const uchar* d_in, uchar* d_out, int width, int height)
{
for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
{
d_out[(row * width + col) * 3 + 0] = d_in[(row * width + col) * 3 + 2];
d_out[(row * width + col) * 3 + 1] = d_in[(row * width + col) * 3 + 1];
d_out[(row * width + col) * 3 + 2] = d_in[(row * width + col) * 3 + 0];
}
}
int main()
{
cv::Mat image1 = cv::imread("../../pictures/35.jpg");
cv::Size size(540, 810);
cv::resize(image1, image1, size);//原图太大,调整一下大小
int height = image1.rows;
int width = image1.cols;
int channel = image1.channels();
size_t image1_size = sizeof(uchar) * height * width * channel;
if(!image1.isContinuous())
{
cout << "img1 is not continuous." << endl;
}
cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
uchar* d_in = NULL;
uchar* d_out = NULL;
cudaMalloc((void**)&d_in, image1_size);
cudaMalloc((void**)&d_out, image1_size);
cudaMemcpy(d_in, image1.data, image1_size, cudaMemcpyHostToDevice);
dim3 dimGrid(8, 8, 1);
dim3 dimBlock(32, 32, 1);
deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);
cudaMemcpy(image1_.data, d_out, image1_size, cudaMemcpyDeviceToHost);
cv::imshow("image1_", image1_);
cv::imwrite("../../pictures/35_RGB.jpg",image1_);
cv::waitKey();
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
//这是强制类型转换成uchar3*再进行传递
#include
#include
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"
using namespace std;
__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
{
d_out[row * width + col].x = d_in[row * width + col].z;
d_out[row * width + col].y = d_in[row * width + col].y;
d_out[row * width + col].z = d_in[row * width + col].x;
}
}
int main()
{
cv::Mat image1 = cv::imread("../../pictures/35.jpg");
cv::Size size(540, 810);
cv::resize(image1, image1, size);
int height = image1.rows;
int width = image1.cols;
size_t image1_size = sizeof(uchar3) * height * width;
if(!image1.isContinuous())
{
cout << "img1 is not continuous." << endl;
}
cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
uchar3* d_in = NULL;
uchar3* d_out = NULL;
uchar3* h_out = (uchar3*)image1_.data;
cudaMalloc((void**)&d_in, image1_size);
cudaMalloc((void**)&d_out, image1_size);
cudaMemcpy(d_in, (uchar3*)image1.data, image1_size, cudaMemcpyHostToDevice);
dim3 dimGrid(8, 8, 1);
dim3 dimBlock(32, 32, 1);
deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);
cudaMemcpy(h_out, d_out, image1_size, cudaMemcpyDeviceToHost);
cv::imshow("image1_", image1_);
cv::waitKey();
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
效果是一样的。灰度图只需要用到uchar*指针,我这里就不赘述了。最后写一个BRG换成RGB,并且图片向左旋转90度的,其实都差不多,有点不一样而已。
//BRG转RGB,并左转90度
#include
#include
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"
using namespace std;
__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
{
d_out[(width - col) * height + row].x = d_in[row * width + col].z;
d_out[(width - col) * height + row].y = d_in[row * width + col].y;
d_out[(width - col) * height + row].z = d_in[row * width + col].x;
}
}
int main()
{
cv::Mat image1 = cv::imread("../../pictures/35.jpg");
int width = image1.cols;
int height = image1.rows;
size_t size_image1 = sizeof(uchar3) * width * height;
cv::Mat image1_(width, height, CV_8UC3);//这里不一样
uchar3* d_in = NULL;
uchar3* d_out = NULL;
uchar3* h_out = (uchar3*)image1_.data;
cudaMalloc((void**)&d_in, size_image1);
cudaMalloc((void**)&d_out, size_image1);
cudaMemcpy(d_in, (uchar3*)image1.data, size_image1, cudaMemcpyHostToDevice);
dim3 dimGrid(8, 8, 1);
dim3 dimBlock(32, 32, 1);
deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);
cudaMemcpy(h_out, d_out, size_image1, cudaMemcpyDeviceToHost);
cv::imshow("image1_", image1_);
cv::imwrite("../../pictures/35_RGB_90.jpg",image1_);
cv::waitKey();
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
效果图
总结一下,如果是CV_8UC3类型的图片,可以用uchar* 或者uchar3* 类型的指针进行传递;但是如果是CV_8UC1类型的图片,即单通道,只能用uchar* 类型的指针进行传递,不能用uchar3* 。另外可以看出,如果不用cv::cuda模块,自己写的代码量会很大,如果图片要进行很复杂的处理,将是个大工程!
最后的最后,附上CMakeLists.txt
cmake_minimum_required(VERSION 3.5)
project(cuda_c_test_9 LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include_directories(include
${
CUDA_INCLUDE_DIRS}
${
OpenCV_INCLUDE_DIRS}
)
link_directories(${
OpenCV_LIBRARY_DIRS})
find_package(CUDA REQUIRED)
find_package(OpenCV REQUIRED)
INCLUDE(/home/psdz/cmake-3.9.0/Modules/FindCUDA.cmake)
FILE(GLOB SOURCES "*.cu" "*.cpp" "*.c" "*.h")
set(CUDA_NVCC_FLAGS "-g -G")
CUDA_ADD_EXECUTABLE(cuda_c_test_9 main.cu)
target_link_libraries(cuda_c_test_9 ${
OpenCV_LIBS})
主要参考
https://blog.csdn.net/kelvin_yan/article/details/48315175
https://www.cnblogs.com/dwdxdy/p/3528711.html
https://blog.csdn.net/lingsuifenfei123/article/details/83444159