CUDA学习(十一) 利用npp做图像处理

今天想着使用npp做一些图像处理

npp是cuda的一个library,主要用于处理图像和视频,封装了大量的处理函数。

接下来就看一个CUDA SDK里面的一个sample,\v9.1\7_CUDALibraries\cannyEdgeDetectorNPP\

它原先是用来处理pgm的图片,这种格式不常见,所以就把代码简单的修改为处理bmp的图片,下面就看看这个demo的细节吧,以备后日使用

总结一下它的使用过程

  1. 确认电脑有device并set
  2. 确认有相应的图片
  3. 分配主机内存并载入
  4. 分配device端内存,并将host端载入的图片放到device
  5. 分配运算用的临时内存
  6. 运用封装的算法处理载入的图片,并分配输出内存,将device端的输入经算法后传入device的输出
  7. device to host
  8. free host and device memory

以下是整个代码的实现细节

ImageIO.h

/**
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

#ifndef NV_UTIL_NPP_IMAGE_IO_H
#define NV_UTIL_NPP_IMAGE_IO_H

#include "ImagesCPU.h"
#include "ImagesNPP.h"

#include "FreeImage.h"
#include "Exceptions.h"

#include 
#include "string.h"


// Error handler for FreeImage library.
//  In case this handler is invoked, it throws an NPP exception.
void
FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif, const char *zMessage)
{
    throw npp::Exception(zMessage);
}

namespace npp
{
    // Load a gray-scale image from disk.
    //看看图片载入函数
    void
    loadImage(const std::string &rFileName, ImageCPU_8u_C1 &rImage)
    {
        // set your own FreeImage error handler
        //设置错误句柄
        FreeImage_SetOutputMessage(FreeImageErrorHandler);
        //根据输入获取图像格式
        FREE_IMAGE_FORMAT eFormat = FreeImage_GetFileType(rFileName.c_str());

        // no signature? try to guess the file format from the file extension
        if (eFormat == FIF_UNKNOWN)
        {
            eFormat = FreeImage_GetFIFFromFilename(rFileName.c_str());
        }

        NPP_ASSERT(eFormat != FIF_UNKNOWN);
        // check that the plugin has reading capabilities ...
        FIBITMAP *pBitmap;
        //从这里可以看到这里图像的读入需要注意输入图片的格式
        if (FreeImage_FIFSupportsReading(eFormat))
        {
            pBitmap = FreeImage_Load(eFormat, rFileName.c_str());
        }

        NPP_ASSERT(pBitmap != 0);
        // make sure this is an 8-bit single channel image
        //获取图像color类型,具体类型可以查看结构体
        //这里可以看出这个主要是用来处理8bit灰度图
        NPP_ASSERT(FreeImage_GetColorType(pBitmap) == FIC_MINISBLACK);
        NPP_ASSERT(FreeImage_GetBPP(pBitmap) == 8);
        
        //这里还没想清楚,为什么这里要声明一个新的,直接用输入的rImage不行么?
        // create an ImageCPU to receive the loaded image data
        ImageCPU_8u_C1 oImage(FreeImage_GetWidth(pBitmap), FreeImage_GetHeight(pBitmap));

        // Copy the FreeImage data into the new ImageCPU
        //获取每一行的所有位数nSrcPitch
        unsigned int nSrcPitch = FreeImage_GetPitch(pBitmap);
        const Npp8u *pSrcLine = FreeImage_GetBits(pBitmap) + nSrcPitch * (FreeImage_GetHeight(pBitmap) -1);

        //这个是把两个指针指到一处
        Npp8u *pDstLine = oImage.data();
        unsigned int nDstPitch = oImage.pitch();

        //主要给pDstLine,即oImage初始化,把src赋值到dst
        for (size_t iLine = 0; iLine < oImage.height(); ++iLine)
        {
            memcpy(pDstLine, pSrcLine, oImage.width() * sizeof(Npp8u));
            pSrcLine -= nSrcPitch;
            pDstLine += nDstPitch;
        }

        // swap the user given image with our result image, effecively
        // moving our newly loaded image data into the user provided shell
        //这里就把oImage交给rImage
        oImage.swap(rImage);
    }

    //再看看saveImage
    // Save an gray-scale image to disk.
    void
    saveImage(const std::string &rFileName, const ImageCPU_8u_C1 &rImage)
    {
        // create the result image storage using FreeImage so we can easily
        // save
        //为保存图片定义一个指针,我大概能理解这里用的是const rImage防止越界
        FIBITMAP *pResultBitmap = FreeImage_Allocate(rImage.width(), rImage.height(), 8 /* bits per pixel */);
        NPP_ASSERT_NOT_NULL(pResultBitmap);
        unsigned int nDstPitch   = FreeImage_GetPitch(pResultBitmap);
        Npp8u *pDstLine = FreeImage_GetBits(pResultBitmap) + nDstPitch * (rImage.height()-1);
        const Npp8u *pSrcLine = rImage.data();
        unsigned int nSrcPitch = rImage.pitch();

        //同样使用指针位置改变来初始化输出
        for (size_t iLine = 0; iLine < rImage.height(); ++iLine)
        {
            memcpy(pDstLine, pSrcLine, rImage.width() * sizeof(Npp8u));
            pSrcLine += nSrcPitch;
            pDstLine -= nDstPitch;
        }

        // now save the result image
        bool bSuccess;
        //这里最终的输出,还是需要定义输出格式
        bSuccess = FreeImage_Save(FIF_JPEG, pResultBitmap, rFileName.c_str(), 0) == TRUE;
        NPP_ASSERT_MSG(bSuccess, "Failed to save result image.");
    }

    // Load a gray-scale image from disk.
    void
    loadImage(const std::string &rFileName, ImageNPP_8u_C1 &rImage)
    {
        ImageCPU_8u_C1 oImage;
        loadImage(rFileName, oImage);
        ImageNPP_8u_C1 oResult(oImage);
        rImage.swap(oResult);
    }

    // Save an gray-scale image to disk.
    void
    saveImage(const std::string &rFileName, const ImageNPP_8u_C1 &rImage)
    {
        ImageCPU_8u_C1 oHostImage(rImage.size());
        // copy the device result data
        rImage.copyTo(oHostImage.data(), oHostImage.pitch());
        saveImage(rFileName, oHostImage);
    }
}


#endif // NV_UTIL_NPP_IMAGE_IO_H

main.cpp

/**
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#  define WINDOWS_LEAN_AND_MEAN
#  define NOMINMAX
#  include 
#  pragma warning(disable:4819)
#endif

//这里的ImagesCPU.h ImagesNPP.h ImageIO.h是对FreeImage.h的更深的封装,其实要想好好的实现自//己的功能,还是查看FreeImage.h比较实在,不过没有源码,封装到lib文件了
#include 
#include 
#include 

//Exceptions是用来处理异常的
#include 

#include 
#include 
#include 

#include 
#include 

#include 
#include 

inline int cudaDeviceInit(int argc, const char **argv)
{
    int deviceCount;
    checkCudaErrors(cudaGetDeviceCount(&deviceCount));

    if (deviceCount == 0)
    {
        std::cerr << "CUDA error: no devices supporting CUDA." << std::endl;
        exit(EXIT_FAILURE);
    }

    int dev = findCudaDevice(argc, argv);

    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, dev);
    std::cerr << "cudaSetDevice GPU" << dev << " = " << deviceProp.name << std::endl;

    checkCudaErrors(cudaSetDevice(dev));

    return dev;
}

bool printfNPPinfo(int argc, char *argv[])
{
    const NppLibraryVersion *libVer   = nppGetLibVersion();

    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);

    int driverVersion, runtimeVersion;
    cudaDriverGetVersion(&driverVersion);
    cudaRuntimeGetVersion(&runtimeVersion);

    printf("  CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
    printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion/1000, (runtimeVersion%100)/10);

    // Min spec is SM 1.0 devices
    bool bVal = checkCudaCapabilities(1, 0);
    return bVal;
}

int main(int argc, char *argv[])
{
    printf("%s Starting...\n\n", argv[0]);

    try
    {
        std::string sFilename;
        char *filePath;

        cudaDeviceInit(argc, (const char **)argv);

        if (printfNPPinfo(argc, argv) == false)
        {
            exit(EXIT_SUCCESS);
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "input"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
        }
        else
        {
            filePath = sdkFindFilePath("test_canny.bmp", argv[0]);
        }

        if (filePath)
        {
            sFilename = filePath;
        }
        else
        {
            sFilename = "test_canny.bmp";
        }

        // if we specify the filename at the command line, then we only test sFilename[0].
        int file_errors = 0;
        std::ifstream infile(sFilename.data(), std::ifstream::in);

        if (infile.good())
        {
            std::cout << "cannyEdgeDetectionNPP opened: <" << sFilename.data() << "> successfully!" << std::endl;
            file_errors = 0;
            infile.close();
        }
        else
        {
            std::cout << "cannyEdgeDetectionNPP unable to open: <" << sFilename.data() << ">" << std::endl;
            file_errors++;
            infile.close();
        }

        if (file_errors > 0)
        {
            exit(EXIT_FAILURE);
        }

        std::string sResultFilename = sFilename;

        std::string::size_type dot = sResultFilename.rfind('.');

        if (dot != std::string::npos)
        {
            sResultFilename = sResultFilename.substr(0, dot);
        }

        sResultFilename += "test_canny_result.bmp";

        if (checkCmdLineFlag(argc, (const char **)argv, "output"))
        {
            char *outputFilePath;
            getCmdLineArgumentString(argc, (const char **)argv, "output", &outputFilePath);
            sResultFilename = outputFilePath;
        }

        // declare a host image object for an 8-bit grayscale image
        //注意这里对于输出图片的定义
        npp::ImageCPU_8u_C1 oHostSrc;
        // load gray-scale image from disk
        //注意这里载入图片的方法,我们看看loadImage()
        npp::loadImage(sFilename, oHostSrc);
        // declare a device image and copy construct from the host image,
        // i.e. upload host to device
        
        //这里直接将host的内存复制到device内存,作为device端处理的src
        npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc);

        NppiSize oSrcSize = {(int)oDeviceSrc.width(), (int)oDeviceSrc.height()};
        NppiPoint oSrcOffset = {0, 0};

        // create struct with ROI size
        NppiSize oSizeROI = {(int)oDeviceSrc.width() , (int)oDeviceSrc.height() };
        // allocate device image of appropriately reduced size
        // 分配device端内存用于处理src后保存的dst
        npp::ImageNPP_8u_C1 oDeviceDst(oSizeROI.width, oSizeROI.height);

        int nBufferSize = 0;
        Npp8u * pScratchBufferNPP = 0;

        
        // get necessary scratch buffer size and allocate that much device memory
        NPP_CHECK_NPP (
                           nppiFilterCannyBorderGetBufferSize(oSizeROI, &nBufferSize) );
        //分配足够的临时存储器
        cudaMalloc((void **)&pScratchBufferNPP, nBufferSize);

        // now run the canny edge detection filter
        // Using nppiNormL2 will produce larger magnitude values allowing for finer control of threshold values 
        // while nppiNormL1 will be slightly faster. Also, selecting the sobel gradient filter allows up to a 5x5 kernel size
        // which can produce more precise results but is a bit slower. Commonly nppiNormL2 and sobel gradient filter size of
        // 3x3 are used. Canny recommends that the high threshold value should be about 3 times the low threshold value.
        // The threshold range will depend on the range of magnitude values that the sobel gradient filter generates for a particular image.

        Npp16s nLowThreshold = 72;
        Npp16s nHighThreshold = 256;

        if ((nBufferSize > 0) && (pScratchBufferNPP != 0))
        {
        
        //利用封装的代码直接处理图片

            NPP_CHECK_NPP (
                               nppiFilterCannyBorder_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset, 
                                                            oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI,
                                                            NPP_FILTER_SOBEL, NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, 
                                                            nppiNormL2, NPP_BORDER_REPLICATE, pScratchBufferNPP) );
        }

        // free scratch buffer memory
        cudaFree(pScratchBufferNPP);
        

        //后面就是简单的声明内存然后保存图片
        // declare a host image for the result
        npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
        // and copy the device result data into it
        oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch());

        
        saveImage(sResultFilename, oHostDst);
        std::cout << "Saved image: " << sResultFilename << std::endl;

        nppiFree(oDeviceSrc.data());
        nppiFree(oDeviceDst.data());

        exit(EXIT_SUCCESS);
    }
    catch (npp::Exception &rException)
    {
        std::cerr << "Program error! The following exception occurred: \n";
        std::cerr << rException << std::endl;
        std::cerr << "Aborting." << std::endl;

        exit(EXIT_FAILURE);
    }
    catch (...)
    {
        std::cerr << "Program error! An unknow type of exception occurred. \n";
        std::cerr << "Aborting." << std::endl;

        exit(EXIT_FAILURE);
        return -1;
    }

    return 0;
}

 

你可能感兴趣的:(CUDA,并行计算)