opencv(C++)GPU、CPU 模板匹配

摘要:

本文主要关注opencv常规版和cuda版的模板匹配算法,网上cuda版的资料不多,这里做个记录,以后用到也好有个参考。

@[toc]

1. opencv cuda版配置

opencv cuda版需要自己用cmake编译,编译过程并不复杂,cmake编译成vs的项目,然后用vs编译成opencv_worldXXX.dll.编译过程可参考link1,link2

  • 编译后的opencv 目录


    在这里插入图片描述
  • vs项目配置,包含目录,库目录,连接器


    在这里插入图片描述

    在这里插入图片描述

    注:这里的opencv_world341.lib对应的opencv_world341.dll有两种办法加入到项目中,一是:在系统环境变量中添加它的路径,二是:把它直接复制到项目.exe所在路径。如果有第三种方法,比如在vs项目的什么地方添加一下什么路径就行的话,还请告知,因为我总觉得这两者都不怎么高效,特别是需要移植到不同计算机上时。

2. 源码

  • 包含cpu、gpu版的模板匹配算法demo示例,还有一个速度对比测试。
# include 
# include 
# include 

int gpuTemplateMatch(const cv::Mat &srcImage, const cv::Mat &tempImage, cv::Mat &result,
    double &matchVal, cv::Point &matchLoc, int mode)
{
    if (srcImage.empty() || tempImage.empty())
    {
        std::cout << "ERROR:In function gpuTemplateMatch: input image is empty! \n";
        return -1;
    }
    cv::cuda::GpuMat d_result;
    cv::cuda::GpuMat d_srcImage;
    cv::cuda::GpuMat d_tempImage;
    d_srcImage.upload(srcImage);
    d_tempImage.upload(tempImage);

    cv::Ptr alg ;
    switch (mode)
    {
    case 0:
        //R = sum (t-Roi)^2
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_SQDIFF);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
        break;
    case 1:
        //R = sum (t-Roi)^2/(sqrt(sum t^2   *  sum Roi^2))
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_SQDIFF_NORMED);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
        break;
    case 2:
        //R = sum t*Roi
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCORR);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 3:
        //R = sum t*Roi / (sqrt(sum t^2   *  sum Roi^2))
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCORR_NORMED);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 4:
        //R = sum t1*Roi1
        //t1 = t - t_mean
        //Roi1 = Roi - Roi_mean
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 5:
        //R = sum t1*Roi1 / (sqrt(sum t1^2   *  sum Roi1^2))
        //t1 = t - t_mean
        //Roi1 = Roi - Roi_mean
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF_NORMED);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    default:
        //cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
        alg = cv::cuda::createTemplateMatching(srcImage.type(), cv::TM_CCOEFF_NORMED);
        alg->match(d_srcImage, d_tempImage, d_result);
        d_result.download(result);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    }

    return 0;
}


int cpuTemplateMatch(const cv::Mat &srcImage, const cv::Mat &tempImage, cv::Mat &result,
    double &matchVal, cv::Point &matchLoc, int mode)
{
    //https://docs.opencv.org/3.4.12/de/da9/tutorial_template_matching.html
    if (srcImage.empty() || tempImage.empty())
    {
        std::cout << "ERROR:In function cpuTemplateMatch: input image is empty! \n";
        return -1;
    }

    //cv::Mat result;

    int result_w = srcImage.cols - tempImage.cols;
    int result_h = srcImage.rows - tempImage.rows;
    if (result_w < 0 || result_h < 0)
    {
        std::cout << "ERROR:in function opencvTemplateMatch: roi image's size should be larger than tamplate's \n";
        return -1;
    }
    //result.create(result_h, result_w, CV_32FC1);
    switch (mode)
    {
    case 0:
        //R = sum (t-Roi)^2
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_SQDIFF);
        cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
        break;
    case 1:
        //R = sum (t-Roi)^2/(sqrt(sum t^2   *  sum Roi^2))
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_SQDIFF_NORMED);
        cv::minMaxLoc(result, &matchVal, NULL, &matchLoc, NULL);
        break;
    case 2:
        //R = sum t*Roi
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCORR);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 3:
        //R = sum t*Roi / (sqrt(sum t^2   *  sum Roi^2))
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCORR_NORMED);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 4:
        //R = sum t1*Roi1
        //t1 = t - t_mean
        //Roi1 = Roi - Roi_mean
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    case 5:
        //R = sum t1*Roi1 / (sqrt(sum t1^2   *  sum Roi1^2))
        //t1 = t - t_mean
        //Roi1 = Roi - Roi_mean
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    default:
        cv::matchTemplate(srcImage, tempImage, result, cv::TM_CCOEFF_NORMED);
        cv::minMaxLoc(result, NULL, &matchVal, NULL, &matchLoc);
        break;
    }
    
    return 0;
}


int speedTest()
{
    std::map matchMode =
    {
        {0,"TM_SQDIFF"},
        {1,"TM_SQDIFF_NORMED"},
        {2,"TM_CCORR"},
        {3,"TM_CCORR_NORMED"},
        {4,"TM_CCOEFF"},
        {5,"cv::TM_CCOEFF_NORMED"}
    };
    /*std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
    std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
    cv::Mat srcImage = cv::imread(srcPath, 0);
    cv::Mat tempImage = cv::imread(tempPath, 0);*/

    cv::Mat srcImage = cv::Mat::zeros(cv::Size(200,200),CV_8UC1);
    cv::Mat tempImage = cv::Mat::ones(cv::Size(20, 20), CV_8UC1);
    clock_t start, end;
    double matchVal;
    cv::Point matchLoc;
    cv::Mat result;
    cv::cuda::GpuMat d_srcImage, d_tempImage, d_result;
    cv::Ptr alg;
    int TIMES = 10;

    for (int mode = 0; mode < 6; mode++)
    {
        for (int size = 100; size < 1000; size += 100)
        {
            //resize the image 
            cv::resize(srcImage, srcImage, cv::Size(2000 , 2000 ));
            cv::resize(tempImage, tempImage, cv::Size(size, size));
            d_srcImage.upload(srcImage);
            d_tempImage.upload(tempImage);
            //gpu match
            start = clock();
            for (int times = 0; times < TIMES; times++)
            {
                gpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);
            }
           end = clock();
            auto runtime_gpu = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;

            //cpu match
            start = clock();
            for (int times = 0; times < TIMES; times++)
            {
                cpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);
            }
            end = clock();
            auto runtime_cpu = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;

            //gpu compute only
            start = clock();
            for (int times = 0; times < TIMES; times++)
            {
                alg = cv::cuda::createTemplateMatching(srcImage.type(), mode);//
                alg->match(d_srcImage, d_tempImage, d_result);
            }
            end = clock();
            auto runtime_gpuComputing = (end - start) / TIMES * 1000 / CLOCKS_PER_SEC;
            
            printf("[+++++++++++++++++++++++++++++++++++++++++++++]\n");
            printf("srcSize=[%d,%d], tempSize=[%d,%d]\n", srcImage.rows, srcImage.cols, tempImage.rows, tempImage.cols);
            printf("match mode:%s\n", matchMode[mode].c_str());
            printf("gpu total runtime:%d ms\n", runtime_gpu);
            printf("cpu total runtime:%d ms\n",runtime_cpu);
            printf("cpuT / gpuT :%3f\n", double(runtime_cpu) / double(runtime_gpu));
            printf("gpu compute time:%dms\n", runtime_gpuComputing);
        }
    }
    return 0;
}


int gpuTemplateMatchDemo()
{
    std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
    std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
    cv::Mat srcImage = cv::imread(srcPath, 0);
    cv::Mat tempImage = cv::imread(tempPath, 0);

    //match
    double matchVal;
    cv::Point matchLoc;
    cv::Mat result;
    int mode = 3;
    gpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);

    //show result
    std::cout << "matchVal = " << matchVal << std::endl;
    cv::Point topLeft = matchLoc;
    cv::Point bottomRight = cv::Point(topLeft.x + tempImage.cols, topLeft.y + tempImage.rows);
    cv::Mat drawImage = cv::imread(srcPath);
    cv::rectangle(drawImage, cv::Rect(topLeft, bottomRight), cv::Scalar(0, 255, 0),2);
    cv::imshow("srcImage", srcImage);
    cv::imshow("tempImage", tempImage);
    cv::imshow("drawImage", drawImage);
    //show results
    cv::normalize(result, result, 0, 1, cv::NORM_MINMAX);
    cv::imshow("result", result);
    cv::waitKey(0);
    cv::destroyAllWindows();

    return 0;
}


int cpuTemplateMatchDemo()
{
    //prepare image and template
    std::string srcPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-1.png";
    std::string tempPath = "K:\\imageData\\totalBoard\\image2\\00000103_1-2.png";
    cv::Mat srcImage = cv::imread(srcPath, 0);
    cv::Mat tempImage = cv::imread(tempPath, 0);

    //match
    double matchVal;
    cv::Point matchLoc;
    cv::Mat result;
    int mode = 1;
    cpuTemplateMatch(srcImage, tempImage, result, matchVal, matchLoc, mode);

    //show result
    std::cout << "matchVal = " << matchVal << std::endl;
    cv::Point topLeft = matchLoc;
    cv::Point bottomRight = cv::Point(topLeft.x + tempImage.cols, topLeft.y + tempImage.rows);
    cv::Mat drawImage = cv::imread(srcPath);
    cv::rectangle(drawImage, cv::Rect(topLeft, bottomRight), cv::Scalar(0, 255, 0),2); 
    cv::imshow("srcImage", srcImage);
    cv::imshow("tempImage", tempImage);
    cv::imshow("drawImage", drawImage);
    //show results
    cv::normalize(result, result, 0, 1, cv::NORM_MINMAX);
    cv::imshow("result", result);
    cv::waitKey(0);
    cv::destroyAllWindows();

    return 0;
}


int main()
{
    gpuTemplateMatchDemo();
    cpuTemplateMatchDemo();
    speedTest();
    return 0;
}

3. 结果

  • demo


    在这里插入图片描述
  • 速度对比(部分结果)
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[100,100]
match mode:TM_SQDIFF_NORMED
gpu total runtime:103 ms
cpu total runtime:106 ms
cpuT / gpuT :1.029126
gpu compute time:91ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[200,200]
match mode:TM_SQDIFF_NORMED
gpu total runtime:103 ms
cpu total runtime:95 ms
cpuT / gpuT :0.922330
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[300,300]
match mode:TM_SQDIFF_NORMED
gpu total runtime:101 ms
cpu total runtime:99 ms
cpuT / gpuT :0.980198
gpu compute time:89ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[400,400]
match mode:TM_SQDIFF_NORMED
gpu total runtime:101 ms
cpu total runtime:97 ms
cpuT / gpuT :0.960396
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[500,500]
match mode:TM_SQDIFF_NORMED
gpu total runtime:100 ms
cpu total runtime:94 ms
cpuT / gpuT :0.940000
gpu compute time:90ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[600,600]
match mode:TM_SQDIFF_NORMED
gpu total runtime:111 ms
cpu total runtime:91 ms
cpuT / gpuT :0.819820
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[700,700]
match mode:TM_SQDIFF_NORMED
gpu total runtime:111 ms
cpu total runtime:91 ms
cpuT / gpuT :0.819820
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[800,800]
match mode:TM_SQDIFF_NORMED
gpu total runtime:110 ms
cpu total runtime:89 ms
cpuT / gpuT :0.809091
gpu compute time:102ms
[+++++++++++++++++++++++++++++++++++++++++++++]
srcSize=[2000,2000], tempSize=[900,900]
match mode:TM_SQDIFF_NORMED
gpu total runtime:108 ms
cpu total runtime:85 ms
cpuT / gpuT :0.787037
gpu compute time:101ms
  • 速度对比测试时的GPU状态


    在这里插入图片描述

4. 总结

GPU加速模板匹配看起来效果并不是很好,测试了不同大小的图片有的情况速度会超过CPU,本来觉得应该会有几倍的加速效果,但是其实并没有,大多数情况下反而是变慢了。开始觉得是cpu向gpu传图的过程耗时较多,后面去掉传图的过程只看匹配过程,它的计算就是比cpu的慢,不知道是不是因为这块GPU太低端了。

你可能感兴趣的:(opencv(C++)GPU、CPU 模板匹配)