CUDA+OpenCV简单处理图像

CUDA+OpenCV简单处理图像

本文所使用的环境:Ubuntu18.04+CUDA10.1+OpenCV3.4.11+QtCreator5
本文针对的是在OpenCV中最常用的图片类型:CV_8UC1和CV_8UC3。在cpu上,可以用cv::Mat::at或者cv::Mat::ptr或者迭代器来对图像进行逐个像素的访问和处理,但是想要移植到gpu里,又不使用cv::cuda模块,就只能用基本数据类型(比如uchar和uchar3),关键在于指针的传递。为了照顾初学者,先不使用shared memory和cuda stream等概念,只用global memory和默认流。

查到的资料说,无论图片是什么类型的,它的指针,即cv::Mat::data,默认都是uchar* 类型(我做过测试验证过,没出毛病,应该跟数据存储方式有关),用的时候可以用强制类型转换成需要的类型,比如uchar3* 。

先上原图

CUDA+OpenCV简单处理图像_第1张图片35.jpg

//这是用uchar*类型的指针进行传递    BRG变成RGB
#include 
#include 
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"

using namespace std;


__global__ void deal_image1(const uchar* d_in, uchar* d_out, int width, int height)
{
     
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
     
            d_out[(row * width + col) * 3 + 0] = d_in[(row * width + col) * 3 + 2];
            d_out[(row * width + col) * 3 + 1] = d_in[(row * width + col) * 3 + 1];
            d_out[(row * width + col) * 3 + 2] = d_in[(row * width + col) * 3 + 0];
        }
}

int main()
{
     
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    cv::Size size(540, 810);
    cv::resize(image1, image1, size);//原图太大,调整一下大小
    int height = image1.rows;
    int width = image1.cols;
    int channel = image1.channels();
    size_t image1_size = sizeof(uchar) * height * width * channel;

	if(!image1.isContinuous())
    {
     
        cout << "img1 is not continuous." << endl;
    }
    
    cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
    
    uchar* d_in = NULL;
    uchar* d_out = NULL;

    cudaMalloc((void**)&d_in, image1_size);
    cudaMalloc((void**)&d_out, image1_size);
    cudaMemcpy(d_in, image1.data, image1_size, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);

    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(image1_.data, d_out, image1_size, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::imwrite("../../pictures/35_RGB.jpg",image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

   return 0;
}

效果图
CUDA+OpenCV简单处理图像_第2张图片
再是用uchar3*类型指针进行传递的代码

//这是强制类型转换成uchar3*再进行传递
#include 
#include 
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"

using namespace std;


__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
     
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
     
            d_out[row * width + col].x = d_in[row * width + col].z;
            d_out[row * width + col].y = d_in[row * width + col].y;
            d_out[row * width + col].z = d_in[row * width + col].x;
        }
}

int main()
{
     
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    cv::Size size(540, 810);
    cv::resize(image1, image1, size);
    int height = image1.rows;
    int width = image1.cols;
    size_t image1_size = sizeof(uchar3) * height * width;

	if(!image1.isContinuous())
    {
     
        cout << "img1 is not continuous." << endl;
    }
    
    cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
    uchar3* d_in = NULL;
    uchar3* d_out = NULL;
    uchar3* h_out = (uchar3*)image1_.data;

    cudaMalloc((void**)&d_in, image1_size);
    cudaMalloc((void**)&d_out, image1_size);
    cudaMemcpy(d_in, (uchar3*)image1.data, image1_size, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);

    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(h_out, d_out, image1_size, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

   return 0;
}

效果是一样的。灰度图只需要用到uchar*指针,我这里就不赘述了。最后写一个BRG换成RGB,并且图片向左旋转90度的,其实都差不多,有点不一样而已。

//BRG转RGB,并左转90度
#include 
#include 
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"

using namespace std;


__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
     
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
     
            d_out[(width - col) * height + row].x = d_in[row * width + col].z;
            d_out[(width - col) * height + row].y = d_in[row * width + col].y;
            d_out[(width - col) * height + row].z = d_in[row * width + col].x;
        }
}

int main()
{
     
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    int width = image1.cols;
    int height = image1.rows;
    size_t size_image1 = sizeof(uchar3) * width * height;
    cv::Mat image1_(width, height, CV_8UC3);//这里不一样

    uchar3* d_in = NULL;
    uchar3* d_out = NULL;
    uchar3* h_out = (uchar3*)image1_.data;

    cudaMalloc((void**)&d_in, size_image1);
    cudaMalloc((void**)&d_out, size_image1);
    cudaMemcpy(d_in, (uchar3*)image1.data, size_image1, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);
    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(h_out, d_out, size_image1, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::imwrite("../../pictures/35_RGB_90.jpg",image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

    return 0;
}

效果图
CUDA+OpenCV简单处理图像_第3张图片
总结一下,如果是CV_8UC3类型的图片,可以用uchar* 或者uchar3* 类型的指针进行传递;但是如果是CV_8UC1类型的图片,即单通道,只能用uchar* 类型的指针进行传递,不能用uchar3* 。另外可以看出,如果不用cv::cuda模块,自己写的代码量会很大,如果图片要进行很复杂的处理,将是个大工程!

最后的最后,附上CMakeLists.txt

cmake_minimum_required(VERSION 3.5)

project(cuda_c_test_9 LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

include_directories(include
${
     CUDA_INCLUDE_DIRS}
${
     OpenCV_INCLUDE_DIRS}
)

link_directories(${
     OpenCV_LIBRARY_DIRS})

find_package(CUDA REQUIRED)
find_package(OpenCV REQUIRED)

INCLUDE(/home/psdz/cmake-3.9.0/Modules/FindCUDA.cmake)

FILE(GLOB SOURCES "*.cu" "*.cpp" "*.c" "*.h")

set(CUDA_NVCC_FLAGS "-g -G")

CUDA_ADD_EXECUTABLE(cuda_c_test_9 main.cu)

target_link_libraries(cuda_c_test_9 ${
     OpenCV_LIBS})

主要参考
https://blog.csdn.net/kelvin_yan/article/details/48315175
https://www.cnblogs.com/dwdxdy/p/3528711.html
https://blog.csdn.net/lingsuifenfei123/article/details/83444159

你可能感兴趣的:(opencv,cuda)