MatchTemplateWithMask的源码
static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
{
CV_Assert(_mask.depth() == CV_8U || _mask.depth() == CV_32F);
CV_Assert(_mask.channels() == _templ.channels() || _mask.channels() == 1);
CV_Assert(_templ.size() == _mask.size());
CV_Assert(_img.size().height >= _templ.size().height &&
_img.size().width >= _templ.size().width);
Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
if (img.depth() == CV_8U)
{
img.convertTo(img, CV_32F);
}
if (templ.depth() == CV_8U)
{
templ.convertTo(templ, CV_32F);
}
if (mask.depth() == CV_8U)
{
// To keep compatibility to other masks in OpenCV: CV_8U masks are binary masks
threshold(mask, mask, 0/*threshold*/, 1.0/*maxVal*/, THRESH_BINARY);
mask.convertTo(mask, CV_32F);
}
Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
_result.create(corrSize, CV_32F);
Mat result = _result.getMat();
// If mask has only one channel, we repeat it for every image/template channel
if (templ.type() != mask.type())
{
// Assertions above ensured, that depth is the same and only number of channel differ
std::vector<Mat> maskChannels(templ.channels(), mask);
merge(maskChannels.data(), templ.channels(), mask);
}
if (method == CV_TM_SQDIFF || method == CV_TM_SQDIFF_NORMED)
{
Mat temp_result(corrSize, CV_32F);
Mat img2 = img.mul(img);
Mat mask2 = mask.mul(mask);
// If the mul() is ever unnested, declare MatExpr, *not* Mat, to be more efficient.
// NORM_L2SQR calculates sum of squares
double templ2_mask2_sum = norm(templ.mul(mask), NORM_L2SQR);
crossCorr(img2, mask2, temp_result, Point(0,0), 0, 0);
crossCorr(img, templ.mul(mask2), result, Point(0,0), 0, 0);
// result and temp_result should not be switched, because temp_result is potentially needed
// for normalization.
result = -2 * result + temp_result + templ2_mask2_sum;
if (method == CV_TM_SQDIFF_NORMED)
{
sqrt(templ2_mask2_sum * temp_result, temp_result);
result /= temp_result;
}
}
else if (method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED)
{
// If the mul() is ever unnested, declare MatExpr, *not* Mat, to be more efficient.
Mat templ_mask2 = templ.mul(mask.mul(mask));
crossCorr(img, templ_mask2, result, Point(0,0), 0, 0);
if (method == CV_TM_CCORR_NORMED)
{
Mat temp_result(corrSize, CV_32F);
Mat img2 = img.mul(img);
Mat mask2 = mask.mul(mask);
// NORM_L2SQR calculates sum of squares
double templ2_mask2_sum = norm(templ.mul(mask), NORM_L2SQR);
crossCorr( img2, mask2, temp_result, Point(0,0), 0, 0 );
sqrt(templ2_mask2_sum * temp_result, temp_result);
result /= temp_result;
}
}
else if (method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED)
{
// Do mul() inline or declare MatExpr where possible, *not* Mat, to be more efficient.
Scalar mask_sum = sum(mask);
// T' * M where T' = M * (T - 1/sum(M)*sum(M*T))
Mat templx_mask = mask.mul(mask.mul(templ - sum(mask.mul(templ)).div(mask_sum)));
Mat img_mask_corr(corrSize, img.type()); // Needs separate channels
// CCorr(I, T'*M)
crossCorr(img, templx_mask, result, Point(0, 0), 0, 0);
// CCorr(I, M)
crossCorr(img, mask, img_mask_corr, Point(0, 0), 0, 0);
// CCorr(I', T') = CCorr(I, T'*M) - sum(T'*M)/sum(M)*CCorr(I, M)
// It does not matter what to use Mat/MatExpr, it should be evaluated to perform assign subtraction
Mat temp_res = img_mask_corr.mul(sum(templx_mask).div(mask_sum));
if (img.channels() == 1)
{
result -= temp_res;
}
else
{
// Sum channels of expression
temp_res = temp_res.reshape(1, result.rows * result.cols);
// channels are now columns
reduce(temp_res, temp_res, 1, REDUCE_SUM);
// transform back, but now with only one channel
result -= temp_res.reshape(1, result.rows);
}
if (method == CV_TM_CCOEFF_NORMED)
{
// norm(T')
double norm_templx = norm(mask.mul(templ - sum(mask.mul(templ)).div(mask_sum)),
NORM_L2);
// norm(I') = sqrt{ CCorr(I^2, M^2) - 2*CCorr(I, M^2)/sum(M)*CCorr(I, M)
// + sum(M^2)*CCorr(I, M)^2/sum(M)^2 }
// = sqrt{ CCorr(I^2, M^2)
// + CCorr(I, M)/sum(M)*{ sum(M^2) / sum(M) * CCorr(I,M)
// - 2 * CCorr(I, M^2) } }
Mat norm_imgx(corrSize, CV_32F);
Mat img2 = img.mul(img);
Mat mask2 = mask.mul(mask);
Scalar mask2_sum = sum(mask2);
Mat img_mask2_corr(corrSize, img.type());
crossCorr(img2, mask2, norm_imgx, Point(0,0), 0, 0);
crossCorr(img, mask2, img_mask2_corr, Point(0,0), 0, 0);
temp_res = img_mask_corr.mul(Scalar(1.0, 1.0, 1.0, 1.0).div(mask_sum))
.mul(img_mask_corr.mul(mask2_sum.div(mask_sum)) - 2 * img_mask2_corr);
if (img.channels() == 1)
{
norm_imgx += temp_res;
}
else
{
// Sum channels of expression
temp_res = temp_res.reshape(1, result.rows*result.cols);
// channels are now columns
// reduce sums columns (= channels)
reduce(temp_res, temp_res, 1, REDUCE_SUM);
// transform back, but now with only one channel
norm_imgx += temp_res.reshape(1, result.rows);
}
sqrt(norm_imgx, norm_imgx);
result /= norm_imgx * norm_templx;
}
}
}
学习关于TM_CCOEFF_NORMED的部分,照着实现了一下,要是想做多角度匹配的话,主要看看模板的计算是哪些,匹配的计算是哪些。
void NCCWithMask(Mat src, Mat templ, Mat mask, double& dCreatTemplT, double& dMatch)
{
double t1 = getTickCount();
if (templ.depth() == CV_8U){ templ.convertTo(templ, CV_32F);}
if (mask.depth() == CV_8U)
{
threshold(mask, mask, 0/*threshold*/, 1.0/*maxVal*/, THRESH_BINARY);
mask.convertTo(mask, CV_32F);
}
if (src.isContinuous() == false) { src = src.clone();}
if (templ.isContinuous() == false) { templ = templ.clone();}
if (mask.isContinuous() == false) { mask = mask.clone();}
int Width = templ.cols;
int Height = templ.rows;
double dMaskSum = 0;
double dMask2Sum = 0;
double dTemplMulMaskSum = 0;//Σ(T*M)
Mat templx_mask = Mat::zeros(Size(Width, Height), CV_32F);
float* prtDataTempl = templ.ptr<float>(0);
float* prtDataMask = (float*)mask.data;
float* ptrDataTemplx_Mask = templx_mask.ptr<float>(0);
//---------------For getting sum(M),sum(T*M)---------------
v_float32 v_maskSum = vx_setzero_f32();
v_float32 v_templMulMaskSum = vx_setzero_f32();
v_float32 v_templ = vx_setzero_f32();
v_float32 v_mask = vx_setzero_f32();
v_float32 v_mask2 = vx_setzero_f32();
int nStep = v_float32::nlanes;
int nOffset = (Width * Height) % nStep;
for (int i = 0; i < Width * Height - nOffset; i += nStep)
{
v_templ = vx_load(prtDataTempl + i);
v_mask = vx_load(prtDataMask + i);
v_maskSum += v_mask;
v_templMulMaskSum = v_templMulMaskSum + v_templ * v_mask;
}
//calculate the data that insufficient step size
for (int i = Width * Height - nOffset; i < Width * Height; i++)
{
dMaskSum += *(prtDataMask + i);
dTemplMulMaskSum += *(prtDataMask + i) * *(prtDataTempl + i);
}
dMaskSum += v_reduce_sum(v_maskSum);
dMask2Sum = dMaskSum;
dTemplMulMaskSum += v_reduce_sum(v_templMulMaskSum);
//---------------For getting (T'*M), sum(T'*M)---------------
v_float32 v_templx_Mask = vx_setall_f32(0);//T' * M
v_float32 v_sumTemplx_Mask = vx_setall_f32(0); //Σ(T' * M)
v_float32 v_Norm_templx = vx_setall_f32(0);
float fconst1 = dTemplMulMaskSum / dMaskSum;
v_float32 v_const = vx_setall_f32(fconst1);
double dSumTemplx_Mask = 0;
double dNorm_templx = 0;
for (int i = 0; i < Width * Height - nOffset; i += nStep)
{
v_templ = vx_load(prtDataTempl + i);
v_mask2 = vx_load(prtDataMask + i);
v_templx_Mask = v_mask2 * (v_templ - v_const);
vx_store(ptrDataTemplx_Mask + i, v_templx_Mask);
v_sumTemplx_Mask = v_sumTemplx_Mask + v_templx_Mask;
v_Norm_templx = v_Norm_templx + v_templx_Mask * v_templx_Mask;
}
//calculate the data that insufficient step size
for (int i = Width * Height - nOffset; i < Width * Height; i++)
{
*(ptrDataTemplx_Mask + i) = *(prtDataMask + i) * (*(prtDataTempl + i) - fconst1);
dSumTemplx_Mask += *(ptrDataTemplx_Mask + i);
dNorm_templx += pow(*(ptrDataTemplx_Mask + i), 2);
}
dSumTemplx_Mask += v_reduce_sum(v_sumTemplx_Mask);
dNorm_templx += v_reduce_sum(v_Norm_templx);
dNorm_templx = sqrt(dNorm_templx);
//double norm_templx = norm(templx_mask, NORM_L2);
dCreatTemplT = ((getTickCount() - t1) / (getTickFrequency() * 1.)) * 1000;//ms
t1 = getTickCount();
//---------------------------------Match---------------------------------
if (src.depth() == CV_8U){ src.convertTo(src, CV_32F); }
Size size(src.cols - Width + 1, src.rows - Height + 1);
Mat result = Mat::zeros(size, CV_32F);
Mat img_mask_corr = Mat::zeros(size, CV_32F);
ConvDFT(src, templx_mask, result);
ConvDFT(src, mask, img_mask_corr);
// CCorr(I', T') = CCorr(I, T'*M) - sum(T'*M)/sum(M)*CCorr(I, M)
//Mat temp_res = img_mask_corr.mul(sum(templx_mask).div(mask_sum));
float* ptrDataConvIMask = img_mask_corr.ptr<float>(0);
float* ptrDataRes = result.ptr<float>(0);
v_float32 v_convIMask = vx_setzero_f32();
v_float32 v_result = vx_setzero_f32();
v_sumTemplx_Mask = vx_setall_f32(dSumTemplx_Mask);
v_maskSum = vx_setall_f32(dMaskSum);
int nOffset2 = (size.area()) % nStep;
Mat src2 = Mat::zeros(src.size(), src.type());
src2 = src.mul(src);
Mat norm_imgx(size, CV_32F);
Mat img_mask2_corr(size, src2.type());
ConvDFT(src2, mask, norm_imgx);//CCorr(I^2, M^2), M^2=M
img_mask2_corr = img_mask_corr.clone();
//norm(I') = sqrt{ CCorr(I^2, M^2) + CCorr(I, M)/sum(M)*{ sum(M^2) / sum(M) * CCorr(I,M) - 2 * CCorr(I, M^2) } }
float* ptrDataNormIx = norm_imgx.ptr<float>(0);
float* ptrDataConvImask2 = img_mask2_corr.ptr<float>(0);
v_float32 v_Norm_imgx = vx_setzero_f32();
v_float32 v_convIMask2 = vx_setzero_f32();
v_convIMask = vx_setzero_f32();
v_result = vx_setzero_f32();
v_float32 v_mask2Sum = vx_setall_f32(dMask2Sum);
v_float32 v_norm_templx = vx_setall_f32(dNorm_templx);
v_const = vx_setall_f32(2.0);
//#pragma omp parallel for
for (int i = 0; i < size.area() - nOffset2; i += nStep)
{
v_Norm_imgx = vx_load(ptrDataNormIx + i);
v_convIMask = vx_load(ptrDataConvIMask + i);
v_convIMask2 = vx_load(ptrDataConvImask2 + i);
v_Norm_imgx = v_sqrt(v_Norm_imgx + v_convIMask / v_maskSum * (v_convIMask * v_mask2Sum / v_maskSum - v_const * v_convIMask2));
v_result = vx_load(ptrDataRes + i);
v_result = (v_result - v_sumTemplx_Mask / v_maskSum * v_convIMask) / (v_Norm_imgx * v_norm_templx);
v_store(ptrDataRes + i, v_result);
}
for (int i = size.area() - nOffset2; i < size.area(); i++)
{
*(ptrDataNormIx + i) = sqrt(*(ptrDataNormIx + i) + *(ptrDataConvIMask + i) / dMaskSum * (*(ptrDataConvIMask + i) * dMask2Sum / dMaskSum - 2 * *(ptrDataConvImask2 + i)));
*(ptrDataRes + i) /= *(ptrDataNormIx + i) * dNorm_templx;
}
dMatch = ((getTickCount() - t1) / (getTickFrequency() * 1.)) * 1000;//ms
}
//crossCorr source code.
void ConvDFT(Mat img, Mat _templ, Mat& corr)
{
double delta = 0;
int borderType = 16;
Point anchor(0, 0);
const double blockScale = 4.5;
const int minBlockSize = 256;
std::vector<uchar> buf;
corr.setTo(0);
Mat templ = _templ;
int depth = img.depth();
int tdepth = templ.depth();
int cdepth = corr.depth();
if (depth != tdepth && tdepth != std::max(CV_32F, depth))
{
_templ.convertTo(templ, std::max(CV_32F, depth));
tdepth = templ.depth();
}
int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
Size blocksize, dftsize;
blocksize.width = cvRound(templ.cols * blockScale);
blocksize.width = std::max(blocksize.width, minBlockSize - templ.cols + 1);
blocksize.width = std::min(blocksize.width, corr.cols);
blocksize.height = cvRound(templ.rows * blockScale);
blocksize.height = std::max(blocksize.height, minBlockSize - templ.rows + 1);
blocksize.height = std::min(blocksize.height, corr.rows);
dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
// recompute block size
blocksize.width = dftsize.width - templ.cols + 1;
blocksize.width = MIN(blocksize.width, corr.cols);
blocksize.height = dftsize.height - templ.rows + 1;
blocksize.height = MIN(blocksize.height, corr.rows);
Mat dftTempl(dftsize.height, dftsize.width, maxDepth);
Mat dftImg(dftsize, maxDepth);
//int i = 0, bufSize = 0;
buf.resize(0);
Ptr<hal::DFT2D> c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
// compute DFT of each template plane
Mat src = templ;
Mat dst(dftTempl, Rect(0, 0, dftsize.width, dftsize.height));
Mat dst1(dftTempl, Rect(0, 0, templ.cols, templ.rows));
if (dst1.data != src.data)
src.convertTo(dst1, dst1.depth());
if (dst.cols > templ.cols)
{
Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
part = Scalar::all(0);
}
c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step);
int tileCountX = (corr.cols + blocksize.width - 1) / blocksize.width;
int tileCountY = (corr.rows + blocksize.height - 1) / blocksize.height;
int tileCount = tileCountX * tileCountY;
Size wholeSize = img.size();
Point roiofs(0, 0);
Mat img0 = img;
if (!(borderType & BORDER_ISOLATED))
{
img.locateROI(wholeSize, roiofs);
img0.adjustROI(roiofs.y, wholeSize.height - img.rows - roiofs.y,
roiofs.x, wholeSize.width - img.cols - roiofs.x);
}
borderType |= BORDER_ISOLATED;
Ptr<hal::DFT2D> cF, cR;
int f = CV_HAL_DFT_IS_INPLACE;
int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);
// calculate correlation by blocks
for (int i = 0; i < tileCount; i++)
{
int x = (i % tileCountX) * blocksize.width;
int y = (i / tileCountX) * blocksize.height;
Size bsz(std::min(blocksize.width, corr.cols - x),
std::min(blocksize.height, corr.rows - y));
Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
int x1 = std::max(0, x0), y1 = std::max(0, y0);
int x2 = std::min(img0.cols, x0 + dsz.width);
int y2 = std::min(img0.rows, y0 + dsz.height);
Mat src0(img0, Range(y1, y2), Range(x1, x2));
Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
Mat dst1(dftImg, Rect(x1 - x0, y1 - y0, x2 - x1, y2 - y1));
Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
Mat src = src0;
dftImg = Scalar::all(0);
if (dst1.data != src.data)
src.convertTo(dst1, dst1.depth());
if (x2 - x1 < dsz.width || y2 - y1 < dsz.height)
copyMakeBorder(dst1, dst, y1 - y0, dst.rows - dst1.rows - (y1 - y0),
x1 - x0, dst.cols - dst1.cols - (x1 - x0), borderType);
if (bsz.height == blocksize.height)
cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else
dft(dftImg, dftImg, 0, dsz.height);
Mat dftTempl1(dftTempl, Rect(0, 0, dftsize.width, dftsize.height));
mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
if (bsz.height == blocksize.height)
cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else
dft(dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height);
src = dftImg(Rect(0, 0, bsz.width, bsz.height));
src.convertTo(cdst, cdepth, 1, delta);
}
}