手写opencv ncc with mask

MatchTemplateWithMask的源码

static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
{
    CV_Assert(_mask.depth() == CV_8U || _mask.depth() == CV_32F);
    CV_Assert(_mask.channels() == _templ.channels() || _mask.channels() == 1);
    CV_Assert(_templ.size() == _mask.size());
    CV_Assert(_img.size().height >= _templ.size().height &&
              _img.size().width >= _templ.size().width);

    Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();

    if (img.depth() == CV_8U)
    {
        img.convertTo(img, CV_32F);
    }
    if (templ.depth() == CV_8U)
    {
        templ.convertTo(templ, CV_32F);
    }
    if (mask.depth() == CV_8U)
    {
        // To keep compatibility to other masks in OpenCV: CV_8U masks are binary masks
        threshold(mask, mask, 0/*threshold*/, 1.0/*maxVal*/, THRESH_BINARY);
        mask.convertTo(mask, CV_32F);
    }

    Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
    _result.create(corrSize, CV_32F);
    Mat result = _result.getMat();

    // If mask has only one channel, we repeat it for every image/template channel
    if (templ.type() != mask.type())
    {
        // Assertions above ensured, that depth is the same and only number of channel differ
        std::vector<Mat> maskChannels(templ.channels(), mask);
        merge(maskChannels.data(), templ.channels(), mask);
    }

    if (method == CV_TM_SQDIFF || method == CV_TM_SQDIFF_NORMED)
    {
        Mat temp_result(corrSize, CV_32F);
        Mat img2 = img.mul(img);
        Mat mask2 = mask.mul(mask);
        // If the mul() is ever unnested, declare MatExpr, *not* Mat, to be more efficient.
        // NORM_L2SQR calculates sum of squares
        double templ2_mask2_sum = norm(templ.mul(mask), NORM_L2SQR);
        crossCorr(img2, mask2, temp_result, Point(0,0), 0, 0);
        crossCorr(img, templ.mul(mask2), result, Point(0,0), 0, 0);
        // result and temp_result should not be switched, because temp_result is potentially needed
        // for normalization.
        result = -2 * result + temp_result + templ2_mask2_sum;

        if (method == CV_TM_SQDIFF_NORMED)
        {
            sqrt(templ2_mask2_sum * temp_result, temp_result);
            result /= temp_result;
        }
    }
    else if (method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED)
    {
        // If the mul() is ever unnested, declare MatExpr, *not* Mat, to be more efficient.
        Mat templ_mask2 = templ.mul(mask.mul(mask));
        crossCorr(img, templ_mask2, result, Point(0,0), 0, 0);

        if (method == CV_TM_CCORR_NORMED)
        {
            Mat temp_result(corrSize, CV_32F);
            Mat img2 = img.mul(img);
            Mat mask2 = mask.mul(mask);
            // NORM_L2SQR calculates sum of squares
            double templ2_mask2_sum = norm(templ.mul(mask), NORM_L2SQR);
            crossCorr( img2, mask2, temp_result, Point(0,0), 0, 0 );
            sqrt(templ2_mask2_sum * temp_result, temp_result);
            result /= temp_result;
        }
    }
    else if (method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED)
    {
        // Do mul() inline or declare MatExpr where possible, *not* Mat, to be more efficient.

        Scalar mask_sum = sum(mask);
        // T' * M where T' = M * (T - 1/sum(M)*sum(M*T))
        Mat templx_mask = mask.mul(mask.mul(templ - sum(mask.mul(templ)).div(mask_sum)));
        Mat img_mask_corr(corrSize, img.type()); // Needs separate channels

        // CCorr(I, T'*M)
        crossCorr(img, templx_mask, result, Point(0, 0), 0, 0);
        // CCorr(I, M)
        crossCorr(img, mask, img_mask_corr, Point(0, 0), 0, 0);

        // CCorr(I', T') = CCorr(I, T'*M) - sum(T'*M)/sum(M)*CCorr(I, M)
        // It does not matter what to use Mat/MatExpr, it should be evaluated to perform assign subtraction
        Mat temp_res = img_mask_corr.mul(sum(templx_mask).div(mask_sum));
        if (img.channels() == 1)
        {
            result -= temp_res;
        }
        else
        {
            // Sum channels of expression
            temp_res = temp_res.reshape(1, result.rows * result.cols);
            // channels are now columns
            reduce(temp_res, temp_res, 1, REDUCE_SUM);
            // transform back, but now with only one channel
            result -= temp_res.reshape(1, result.rows);
        }
        if (method == CV_TM_CCOEFF_NORMED)
        {
            // norm(T')
            double norm_templx = norm(mask.mul(templ - sum(mask.mul(templ)).div(mask_sum)),
                                      NORM_L2);
            // norm(I') = sqrt{ CCorr(I^2, M^2) - 2*CCorr(I, M^2)/sum(M)*CCorr(I, M)
            //                  + sum(M^2)*CCorr(I, M)^2/sum(M)^2 }
            //          = sqrt{ CCorr(I^2, M^2)
            //                  + CCorr(I, M)/sum(M)*{ sum(M^2) / sum(M) * CCorr(I,M)
            //                  - 2 * CCorr(I, M^2) } }
            Mat norm_imgx(corrSize, CV_32F);
            Mat img2 = img.mul(img);
            Mat mask2 = mask.mul(mask);
            Scalar mask2_sum = sum(mask2);
            Mat img_mask2_corr(corrSize, img.type());
            crossCorr(img2, mask2, norm_imgx, Point(0,0), 0, 0);
            crossCorr(img, mask2, img_mask2_corr, Point(0,0), 0, 0);
            temp_res = img_mask_corr.mul(Scalar(1.0, 1.0, 1.0, 1.0).div(mask_sum))
                           .mul(img_mask_corr.mul(mask2_sum.div(mask_sum)) - 2 * img_mask2_corr);
            if (img.channels() == 1)
            {
                norm_imgx += temp_res;
            }
            else
            {
                // Sum channels of expression
                temp_res = temp_res.reshape(1, result.rows*result.cols);
                // channels are now columns
                // reduce sums columns (= channels)
                reduce(temp_res, temp_res, 1, REDUCE_SUM);
                // transform back, but now with only one channel
                norm_imgx += temp_res.reshape(1, result.rows);
            }
            sqrt(norm_imgx, norm_imgx);
            result /= norm_imgx * norm_templx;
        }
    }
}

学习关于TM_CCOEFF_NORMED的部分,照着实现了一下,要是想做多角度匹配的话,主要看看模板的计算是哪些,匹配的计算是哪些。

void NCCWithMask(Mat src, Mat templ, Mat mask, double& dCreatTemplT, double& dMatch)
{
    double t1 = getTickCount();
    if (templ.depth() == CV_8U){ templ.convertTo(templ, CV_32F);}

    if (mask.depth() == CV_8U)
    {
        threshold(mask, mask, 0/*threshold*/, 1.0/*maxVal*/, THRESH_BINARY);
        mask.convertTo(mask, CV_32F);
    }
    if (src.isContinuous() == false)	{ src = src.clone();}
    if (templ.isContinuous() == false)	{ templ = templ.clone();}
    if (mask.isContinuous() == false)	{ mask = mask.clone();}
    int Width 				= templ.cols;
    int Height 				= templ.rows;
    double dMaskSum 		= 0;
    double dMask2Sum 		= 0;
    double dTemplMulMaskSum = 0;//Σ(T*M)

    Mat templx_mask 			= Mat::zeros(Size(Width, Height), CV_32F);
    float* prtDataTempl 		= templ.ptr<float>(0);
    float* prtDataMask 			= (float*)mask.data;
    float* ptrDataTemplx_Mask 	= templx_mask.ptr<float>(0);
    //---------------For getting sum(M),sum(T*M)---------------
    v_float32 v_maskSum 		= vx_setzero_f32();
    v_float32 v_templMulMaskSum = vx_setzero_f32();
    v_float32 v_templ 			= vx_setzero_f32();
    v_float32 v_mask 			= vx_setzero_f32();
    v_float32 v_mask2 			= vx_setzero_f32();
    int nStep 					= v_float32::nlanes;
    int nOffset = (Width * Height) % nStep;
    for (int i = 0; i < Width * Height - nOffset; i += nStep)
    {
        v_templ 			= vx_load(prtDataTempl + i);
        v_mask 				= vx_load(prtDataMask + i);
        v_maskSum 			+= v_mask;
        v_templMulMaskSum 	= v_templMulMaskSum + v_templ * v_mask;
    }
    //calculate the data that insufficient step size
    for (int i = Width * Height - nOffset; i < Width * Height; i++)
    {
        dMaskSum 			+= *(prtDataMask + i);
        dTemplMulMaskSum 	+= *(prtDataMask + i) * *(prtDataTempl + i);
    }
    dMaskSum 				+= v_reduce_sum(v_maskSum);
    dMask2Sum 				= dMaskSum;
    dTemplMulMaskSum 		+= v_reduce_sum(v_templMulMaskSum);
    //---------------For getting (T'*M), sum(T'*M)---------------
    v_float32 v_templx_Mask 	= vx_setall_f32(0);//T' * M
    v_float32 v_sumTemplx_Mask 	= vx_setall_f32(0); //Σ(T' * M)
    v_float32 v_Norm_templx 	= vx_setall_f32(0);
    float fconst1 				= dTemplMulMaskSum / dMaskSum;
    v_float32 v_const 			= vx_setall_f32(fconst1);
    double dSumTemplx_Mask 		= 0;
    double dNorm_templx 		= 0;
    for (int i = 0; i < Width * Height - nOffset; i += nStep)
    {
        v_templ 			= vx_load(prtDataTempl + i);
        v_mask2 			= vx_load(prtDataMask + i);
        v_templx_Mask 		= v_mask2 * (v_templ - v_const);
        vx_store(ptrDataTemplx_Mask + i, v_templx_Mask);
        v_sumTemplx_Mask 	= v_sumTemplx_Mask + v_templx_Mask;
        v_Norm_templx 		= v_Norm_templx + v_templx_Mask * v_templx_Mask;
    }
    //calculate the data that insufficient step size
    for (int i = Width * Height - nOffset; i < Width * Height; i++)
    {
        *(ptrDataTemplx_Mask + i) 	= *(prtDataMask + i) * (*(prtDataTempl + i) - fconst1);
        dSumTemplx_Mask 			+= *(ptrDataTemplx_Mask + i);
        dNorm_templx 				+= pow(*(ptrDataTemplx_Mask + i), 2);
    }
    dSumTemplx_Mask 				+= v_reduce_sum(v_sumTemplx_Mask);
    dNorm_templx 					+= v_reduce_sum(v_Norm_templx);
    dNorm_templx 					= sqrt(dNorm_templx);
    //double norm_templx = norm(templx_mask, NORM_L2);
    dCreatTemplT = ((getTickCount() - t1) / (getTickFrequency() * 1.)) * 1000;//ms
    t1 = getTickCount();
    //---------------------------------Match---------------------------------
    if (src.depth() == CV_8U){ src.convertTo(src, CV_32F); }
    Size size(src.cols - Width + 1, src.rows - Height + 1);
    Mat result 			= Mat::zeros(size, CV_32F);
    Mat img_mask_corr 	= Mat::zeros(size, CV_32F);
    ConvDFT(src, templx_mask, result);
    ConvDFT(src, mask, img_mask_corr);
    // CCorr(I', T') = CCorr(I, T'*M) - sum(T'*M)/sum(M)*CCorr(I, M)
    //Mat temp_res = img_mask_corr.mul(sum(templx_mask).div(mask_sum));
    float* ptrDataConvIMask = img_mask_corr.ptr<float>(0);
    float* ptrDataRes 		= result.ptr<float>(0);
    v_float32 v_convIMask 	= vx_setzero_f32();
    v_float32 v_result 		= vx_setzero_f32();
    v_sumTemplx_Mask 		= vx_setall_f32(dSumTemplx_Mask);
    v_maskSum 				= vx_setall_f32(dMaskSum);
    int nOffset2 = (size.area()) % nStep;
    Mat src2 				= Mat::zeros(src.size(), src.type());
    src2 					= src.mul(src);
    Mat norm_imgx(size, CV_32F);
    Mat img_mask2_corr(size, src2.type());
    ConvDFT(src2, mask, norm_imgx);//CCorr(I^2, M^2), M^2=M
    img_mask2_corr 				= img_mask_corr.clone();
    //norm(I') =  sqrt{ CCorr(I^2, M^2) + CCorr(I, M)/sum(M)*{ sum(M^2) / sum(M) * CCorr(I,M) - 2 * CCorr(I, M^2) } }
    float* ptrDataNormIx 		= norm_imgx.ptr<float>(0);
    float* ptrDataConvImask2 	= img_mask2_corr.ptr<float>(0);
    v_float32 v_Norm_imgx 		= vx_setzero_f32();
    v_float32 v_convIMask2 		= vx_setzero_f32();
    v_convIMask 				= vx_setzero_f32();
    v_result 					= vx_setzero_f32();
    v_float32 v_mask2Sum 		= vx_setall_f32(dMask2Sum);
    v_float32 v_norm_templx 	= vx_setall_f32(dNorm_templx);
    v_const 					= vx_setall_f32(2.0);
    //#pragma omp parallel for
    for (int i = 0; i < size.area() - nOffset2; i += nStep)
    {
        v_Norm_imgx 	= vx_load(ptrDataNormIx + i);
        v_convIMask 	= vx_load(ptrDataConvIMask + i);
        v_convIMask2 	= vx_load(ptrDataConvImask2 + i);
        v_Norm_imgx 	= v_sqrt(v_Norm_imgx + v_convIMask / v_maskSum * (v_convIMask * v_mask2Sum / v_maskSum - v_const * v_convIMask2));
        v_result 		= vx_load(ptrDataRes + i);
        v_result 		= (v_result - v_sumTemplx_Mask / v_maskSum * v_convIMask) / (v_Norm_imgx * v_norm_templx);
        v_store(ptrDataRes + i, v_result);
    }
    for (int i = size.area() - nOffset2; i < size.area(); i++)
    {
        *(ptrDataNormIx + i) = sqrt(*(ptrDataNormIx + i) + *(ptrDataConvIMask + i) / dMaskSum * (*(ptrDataConvIMask + i) * dMask2Sum / dMaskSum - 2 * *(ptrDataConvImask2 + i)));
        *(ptrDataRes + i) 	/= *(ptrDataNormIx + i) * dNorm_templx;
    }
    dMatch = ((getTickCount() - t1) / (getTickFrequency() * 1.)) * 1000;//ms
}
//crossCorr source code.
void ConvDFT(Mat img, Mat _templ, Mat& corr)
{
    double delta = 0;
    int borderType = 16;
    Point anchor(0, 0);
    const double blockScale = 4.5;
    const int minBlockSize = 256;
    std::vector<uchar> buf;
    corr.setTo(0);

    Mat templ = _templ;
    int depth = img.depth();
    int tdepth = templ.depth();
    int cdepth = corr.depth();

    if (depth != tdepth && tdepth != std::max(CV_32F, depth))
    {
        _templ.convertTo(templ, std::max(CV_32F, depth));
        tdepth = templ.depth();
    }

    int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
    Size blocksize, dftsize;

    blocksize.width = cvRound(templ.cols * blockScale);
    blocksize.width = std::max(blocksize.width, minBlockSize - templ.cols + 1);
    blocksize.width = std::min(blocksize.width, corr.cols);
    blocksize.height = cvRound(templ.rows * blockScale);
    blocksize.height = std::max(blocksize.height, minBlockSize - templ.rows + 1);
    blocksize.height = std::min(blocksize.height, corr.rows);

    dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
    dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);

    // recompute block size
    blocksize.width = dftsize.width - templ.cols + 1;
    blocksize.width = MIN(blocksize.width, corr.cols);
    blocksize.height = dftsize.height - templ.rows + 1;
    blocksize.height = MIN(blocksize.height, corr.rows);

    Mat dftTempl(dftsize.height, dftsize.width, maxDepth);
    Mat dftImg(dftsize, maxDepth);

    //int i = 0, bufSize = 0;
    buf.resize(0);

    Ptr<hal::DFT2D> c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);

    // compute DFT of each template plane
    Mat src = templ;
    Mat dst(dftTempl, Rect(0, 0, dftsize.width, dftsize.height));
    Mat dst1(dftTempl, Rect(0, 0, templ.cols, templ.rows));
    if (dst1.data != src.data)
        src.convertTo(dst1, dst1.depth());
    if (dst.cols > templ.cols)
    {
        Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
        part = Scalar::all(0);
    }
    c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step);

    int tileCountX = (corr.cols + blocksize.width - 1) / blocksize.width;
    int tileCountY = (corr.rows + blocksize.height - 1) / blocksize.height;
    int tileCount = tileCountX * tileCountY;

    Size wholeSize = img.size();
    Point roiofs(0, 0);
    Mat img0 = img;

    if (!(borderType & BORDER_ISOLATED))
    {
        img.locateROI(wholeSize, roiofs);
        img0.adjustROI(roiofs.y, wholeSize.height - img.rows - roiofs.y,
            roiofs.x, wholeSize.width - img.cols - roiofs.x);
    }
    borderType |= BORDER_ISOLATED;

    Ptr<hal::DFT2D> cF, cR;
    int f = CV_HAL_DFT_IS_INPLACE;
    int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
    cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
    cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);

    // calculate correlation by blocks
    for (int i = 0; i < tileCount; i++)
    {
        int x = (i % tileCountX) * blocksize.width;
        int y = (i / tileCountX) * blocksize.height;

        Size bsz(std::min(blocksize.width, corr.cols - x),
            std::min(blocksize.height, corr.rows - y));
        Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
        int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
        int x1 = std::max(0, x0), y1 = std::max(0, y0);
        int x2 = std::min(img0.cols, x0 + dsz.width);
        int y2 = std::min(img0.rows, y0 + dsz.height);
        Mat src0(img0, Range(y1, y2), Range(x1, x2));
        Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
        Mat dst1(dftImg, Rect(x1 - x0, y1 - y0, x2 - x1, y2 - y1));
        Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));

        Mat src = src0;
        dftImg = Scalar::all(0);
        if (dst1.data != src.data)
            src.convertTo(dst1, dst1.depth());

        if (x2 - x1 < dsz.width || y2 - y1 < dsz.height)
            copyMakeBorder(dst1, dst, y1 - y0, dst.rows - dst1.rows - (y1 - y0),
                x1 - x0, dst.cols - dst1.cols - (x1 - x0), borderType);

        if (bsz.height == blocksize.height)
            cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
        else
            dft(dftImg, dftImg, 0, dsz.height);

        Mat dftTempl1(dftTempl, Rect(0, 0, dftsize.width, dftsize.height));
        mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);

        if (bsz.height == blocksize.height)
            cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
        else
            dft(dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height);

        src = dftImg(Rect(0, 0, bsz.width, bsz.height));
        src.convertTo(cdst, cdepth, 1, delta);
    }
}

你可能感兴趣的:(Opencv,opencv)