关于OpenCV Gpu模块无法使用Cuda4.2以上版本编译成功的解决方案

当使用目前最新版本的Cuda 5.0编译opencv_gpu工程时,会出现以下问题:

matrix_reductions.cpp(119) : error C2660: “nppiMean_StdDev_8u_C1R”: 函数不接受 5 个参数

这是由于cuda接口出现变动造成的,稍微修改OpenCV源码即可成功编译。


需要修改部分如下:

1: opencv\modules\gpu\src\matrix_reductions.cpp 

找到如下函数

void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)

原本部分内容为:

    DeviceBuffer dbuf(2);

    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );

    cudaSafeCall( cudaDeviceSynchronize() );
    
    double* ptrs[2] = {mean.val, stddev.val};
    dbuf.download(ptrs);

该函数的意图是,调用nppiMean_StdDev_8u_C1R,倒数两个参数分别放置了mean和stddev的返回值,通过DeviceBuffer::download从显存写会内存。而新的nppiMean_StdDev_8u_C1R函数描述如下:

/**
 * 1-channel 8-bit unsigned char image mean and standard deviation.
 *
 * \param pSrc \ref source_image_pointer.
 * \param nSrcStep \ref source_image_line_step.
 * \param oSizeROI \ref roi_specification.
 * \param pDeviceBuffer Pointer to the required device memory allocation, \ref general_scratch_buffer
 *        Use \ref nppiMeanStdDevGetBufferHostSize_8u_C1R to determine the minium number of bytes required.
 * \param pMean Contains computed mean.
 * \param pStdDev Contains computed standard deviation.
 * \return \ref image_data_error_codes, \ref roi_error_codes
 */

从以上说明可以看出,新的函数已经帮我们把mean和stddev返回回来了,故不必再用DeviceBuffer中转。因此将此段代码修改如下

void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
{
    CV_Assert(src.type() == CV_8UC1);

    NppiSize sz;
    sz.width  = src.cols;
    sz.height = src.rows;

    Npp8u buf;

    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz,&buf, mean.val, stddev.val ));

    cudaSafeCall( cudaDeviceSynchronize() );
}

2:opencv\modules\gpu\src\imgproc.cpp

找到如下函数

void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)

原本部分内容为

    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), (const Npp64f *)sqr.ptr<Npp32f>(),static_cast<int>(sqr.step),dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );

由于

nppiRectStdDev_32s32f_C1R第三个参数被换成了const Npp64f *看Cuda定义如下

typedef unsigned char       Npp8u;     ///< 8-bit unsigned chars
typedef signed char         Npp8s;     ///< 8-bit signed chars
typedef unsigned short      Npp16u;    ///< 16-bit unsigned integers
typedef short               Npp16s;    ///< 16-bit signed integers
typedef unsigned int        Npp32u;    ///< 32-bit unsigned integers
typedef int                 Npp32s;    ///< 32-bit signed integers
typedef unsigned long long  Npp64u;    ///< 64-bit unsigned integers
typedef long long           Npp64s;    ///< 64-bit signed integers
typedef float               Npp32f;    ///< 32-bit (IEEE) floating-point numbers
typedef double              Npp64f;    ///< 64-bit floating-point numbers

因此我们做一点小修改将原部分修改为

    Npp64f sqr64f = *sqr.ptr<Npp32f>();

    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), &sqr64f, static_cast<int>(sqr.step),
                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );


3:opencv\modules\gpu\src\graphcuts.cpp


找到如下函数

void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)

原本部分代码为

        nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );


其中最后一个参数有变动,类型不匹配,新的类型是NppiGraphcutState * 看Cuda定义如下

#if defined (__cplusplus)
struct NppiGraphcutState;
#else
typedef struct NppiGraphcutState NppiGraphcutState;
#endif

所以强制类型转换即可

    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), (NppiGraphcutState *)buf.ptr<Npp8u>()) );

4:opencv\modules\gpu\src\element_operations.cpp

找到如下函数

void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)

原本部分代码为

    case CV_32SC1:
        nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step), dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );
        break;

由于Cuda不在有nppiAbsDiff_32s_C1R函数,所以我们将此段代码注释,由于大部分的运算都是用8U进行的,所以注释这里影响不大。


完成以上步骤 即可成功编译


你可能感兴趣的:(CUDA,解决方案,opencv)