1、构建opencv_contrib模块,目录结构如下:
1.1 cuda2.hpp 内容如下:
#ifndef __OPENCV_CUDA2_HPP__
#define __OPENCV_CUDA2_HPP__
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include
namespace cv
{
namespace cuda2
{
CV_EXPORTS_W void threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type);
CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, int interpolation=INTER_LINEAR);
CV_EXPORTS_W void cvtColor(InputArray src, OutputArray dst, int code);
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst);
}
}
#endif /* __OPENCV_CUDA2_HPP__ */
1.2 precomp.hpp 内容如下:
#ifndef __OPENCV_CUDA2_PRECOMP_HPP__
#define __OPENCV_CUDA2_PRECOMP_HPP__
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/cudaoptflow.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/core/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#endif
1.3 cuda2.cpp 内容如下:
#include "precomp.hpp"
namespace cv
{
namespace cuda2
{
CV_EXPORTS_W void threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type)
{
cv::cuda::GpuMat d_src, d_dst;
d_src.upload(src);
cv::cuda::threshold(d_src, d_dst, thresh, maxval, type);
d_dst.download(dst);
}
CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, int interpolation=INTER_LINEAR)
{
cv::cuda::GpuMat d_src, d_dst;
d_src.upload(src);
cv::cuda::resize(d_src, d_dst, dsize, 0, 0, interpolation);
d_dst.download(dst);
}
CV_EXPORTS_W void cvtColor(InputArray src, OutputArray dst, int code)
{
cv::cuda::GpuMat d_src, d_dst;
d_src.upload(src);
cv::cuda::cvtColor(d_src, d_dst, code);
d_dst.download(dst);
}
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst)
{
cv::cuda::GpuMat d_src1, d_src2, d_dst;
d_src1.upload(src1);
d_src2.upload(src2);
cv::cuda::absdiff(d_src1, d_src2, d_dst);
d_dst.download(dst);
}
}
}
1.4 CMakeLists.txt 内容如下:
if(IOS OR WINRT OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
ocv_module_disable(cuda2)
endif()
set(the_description "access opencv cuda methods from Python")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
ocv_define_module(cuda2 opencv_core opencv_imgproc opencv_tracking opencv_cudaoptflow opencv_cudaarithm opencv_cudaimgproc WRAP python)
2、参照上一篇文章《centos7下编译opencv3.4.8+cuda10版本》将上述cuda2模块一起编译进去,最后生成.so文件路径:
/data/User/XX/opencv_cuda/lib/python3.7/site-packages/cv2.cpython-37m-x86_64-linux-gnu.so
将该.so文件拷贝到你使用的python环境下:
/data/Packages/anaconda/data/lib/python3.7/site-packages/cv2/
目录结构如下:
2.1 __init__.py 内容为:
import importlib
from .cv2 import *
from .data import *
# wildcard import above does not import "private" variables like __version__
# this makes them available
globals().update(importlib.import_module('cv2.cv2').__dict__)
2.2 data文件夹的内容为opencv生成目录下
/data/User/XX/opencv_cuda/share/OpenCV/haarcascades/ 的文件拷贝过去即可
3、编译成功后,可以对自定义模块进行测试:
import cv2
import time
if __name__ == '__main__':
s = time.time()
img = cv2.imread("DR8A6268.jpg")
print("imread: ", time.time() - s)
s = time.time()
img1 = cv2.cuda2.resize(img, (3000, 4000))
print("resize: ", time.time() - s)
s = time.time()
img2 = cv2.cuda2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
print("cvtColor: ", time.time() - s)
s = time.time()
img3 = cv2.cuda2.threshold(img2, 128, 255, cv2.THRESH_BINARY)
print("threshold: ", time.time() - s)
s = time.time()
img4 = cv2.cuda2.absdiff(img2, img3)
print("absdiff: ", time.time() - s)
cv2.imwrite("4.png", img4)
测试原图为:
生成结果图为:
4、以上测试成功,说明GPU能够顺利执行,但是实际测试的时候会发现,调用cuda后,执行时间并没有减少,这是因为执行函数在cpu与gpu之间数据拷贝的时间消耗较大,加上python调用c++代码上的时间消耗导致这种使用方式并不那么高效,所以一般在较为复杂的图像处理操作时,可采用这种方式进行加速。