opencv4.3.0的版本更新中将Tengine作为Arm平台的Backend加入OpenCV DNN Module,添加其为第三方库中的一个,提升了OpenCV DNN在Arm平台的运行效率,本文对opencv4.3.0进行交叉编译,开启其对Tengine的支持,在Hi3559板子上运行DNN sample中的face_detector例程,对其加速效果进行测试。
由于编译的服务器无法连接外网,因此本次交叉编译是离线进行的,需要手动下载一些依赖库。
Tengine的github开源仓库:https://github.com/OAID/Tengine
其下有两个分支,其中master分支是源代码本体,进行编译后是能在arm等平台上进行运行,通过将其他框架下的模型转换成其专用的tmfile格式的模型,加快神经网络的运行速度。
另一个分支就是Tengine-opencv,是作为opencv第三方库的编译源代码,我们要用的就是这个。
下载zip文件https://github.com/OAID/Tengine/tree/tengine-opencv
解压后操作
cd example_config
cp himix200_linux_cross.config himix100_linux_cross.config
#修改配置为Hi3559交叉编译工具链
#The following option is used for setting cross compiler path
EMBEDDED_CROSS_PATH=/opt/hisi-linux/x86-arm/aarch64-himix100-linux/bin/
#The following option is used for setting cross compiler prefix
CROSS_COMPILE=aarch64-himix100-linux-
#The following option is used for system architecture type.
#option value [x86/Arm32/Arm64]
ARCH_TYPE=Arm64
默认情况下会编译生成动态库,需要更改linux_build.sh中的内容,在cmake配置参数中添加一条
-DBUILT_IN_OPENCV=ON \
执行以下命令进行编译
./linux_build.sh example_config/himix100_linux_cross.config
编译完成后在install/lib目录看到libtengine.a文件则说明编译成功
opencv4.3.0的交叉编译方法与之前版本没有大的区别,以下只介绍正常流程,若出现其他错误可以参考之前的一篇文章opencv3.2.0移植Hi3559完整总结版
下载opencv-4.3.0源码https://github.com/opencv/opencv/tree/4.3.0
解压后
mkdir output build
cd build
# 注意替换自己的路径
cmake -DCMAKE_BUILD_TYPE=RELEASE \
-DCMAKE_INSTALL_PREFIX=../output \
-DCMAKE_C_COMPILER=aarch64-himix100-linux-gcc \
-DCMAKE_CXX_COMPILER=aarch64-himix100-linux-g++ \
-DCMAKE_EXE_LINKER_FLAGS=-lrt -lpthread \
-DOPENCV_ENABLE_NONFREE=ON \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_EXAMPLES=ON \
-DBUILD_PERF_TESTS=OFF \
-DBUILD_TESTS=OFF \
-DWITH_OPENCL=OFF \
-DBUILD_DOCS=OFF \
-DWITH_CUDA=OFF \
-DWITH_IPP=OFF \
-DWITH_PNG=OFF \
-DENABLE_PROFILING=OFF \
-DOPENCV_LIBTENGINE_ROOT_DIR=/*yourpath*/Tengine-tengine-opencv/install \
-DWITH_TENGINE=ON \
-DZLIB_INCLUDE_DIR=/*yourpath*/opencv-4.3.0/3rdparty/zlib/ \
..
在离线状态下,执行完cmake配置后观察,可能会出现ADE download failed错误
-- ADE: Download: v0.1.1f.zip
-- Try 1 failed
--
解决方法是找到download链接,手动下载源代码并替换。
打开opencv-4.3.0/modules/gapi/cmake/DownloadADE.cmake
按照其github地址下载zip,并重命名为v0.1.1f.zip
修改DownloadADE.cmake文件
- "https://github.com/opencv/ade/archive/"
+ "file::///yourpath"
重新执行cmake
其他download失败都可以用类似方法解决
执行
make
make install
出现其他依赖库错误参考前文
如此编译出来的so和a文件都在output目录下,其中dnn是经过Tengine加速过的版本。
2、face_detector代码测试
人脸测试集是来自,图片均采集自网络
This dataset contains 12,995 face images collected from the Internet. The images are annotated with (1) five facial landmarks, (2) attributes of gender, smiling, wearing glasses, and head pose.
face_detector用到的模型文件在opencv-4.3.0/samples/dnn/face_detector文件夹下,其中
caffer模型文件为
res10_300x300_ssd_iter_140000_fp16.caffemodel
deploy.prototxt
tensorflow模型文件为
opencv_face_detector_uint8.pb
opencv_face_detector.pbtxt
使用download_weights.py脚本在网络正常的情况下下载
face_detector 测试代码
#include
#include
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
#include
#include
#include
//#include
using namespace std;
const size_t inWidth = 300;
const size_t inHeight = 300;
const double inScaleFactor = 1.0;
const Scalar meanVal(104.0, 177.0, 123.0);
const float confidenceThreshold = 0.6;
int readFileList(const char* basePath, vector &fileList);
void face_detect_dnn();
void mtcnn_demo();
int main(int argc, char** argv)
{
face_detect_dnn();
//waitKey(0);
return 0;
}
int readFileList(const char* basePath, vector &fileList)
{
DIR *dir;
struct dirent *ptr;
char base[1000];
if ((dir=opendir(basePath)) == NULL)
{
perror("Open dir error...");
exit(1);
}
while ((ptr=readdir(dir)) != NULL)
{
if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0) ///current dir OR parrent dir
continue;
else if(ptr->d_type == 8) ///file
//printf("d_name:%s/%s\n",basePath,ptr->d_name);
fileList.push_back(ptr->d_name);
else if(ptr->d_type == 10) ///link file
printf("d_name:%s/%s\n",basePath,ptr->d_name);
else if(ptr->d_type == 4) ///dir
{
memset(base,'\0',sizeof(base));
strcpy(base,basePath);
strcat(base,"/");
strcat(base,ptr->d_name);
readFileList(base, fileList);
}
}
closedir(dir);
return 1;
}
void face_detect_dnn() {
//String modelDesc = "D:/projects/opencv_tutorial/data/models/resnet/deploy.prototxt";
// String modelBinary = "D:/projects/opencv_tutorial/data/models/resnet/res10_300x300_ssd_iter_140000.caffemodel";
//String modelBinary = "D:/opencv-4.2.0/opencv/sources/samples/dnn/face_detector/opencv_face_detector_uint8.pb";
//String modelDesc = "D:/opencv-4.2.0/opencv/sources/samples/dnn/face_detector/opencv_face_detector.pbtxt";
String modelBinary = "opencv_face_detector_uint8.pb";
String modelDesc = "opencv_face_detector.pbtxt";
// 初始化网络
// dnn::Net net = readNetFromCaffe(modelDesc, modelBinary);
dnn::Net net = readNetFromTensorflow(modelBinary, modelDesc);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
if (net.empty())
{
printf("could not load net...\n");
return;
}
#if 0
// 打开摄像头
// VideoCapture capture(0);
VideoCapture capture("D:/images/video/Boogie_Up.mp4");
if (!capture.isOpened()) {
printf("could not load camera...\n");
return;
}
#endif
Mat frame;
int count = 0;
//char imagePath[100] = {};
string inputPath = "/mnt/yuwy/opencv/lfw_5590/";
//char outPath[100] = {};
string outPath = "/mnt/yuwy/opencv/out_lfw_5590/";
vector imageList;
readFileList(inputPath.c_str(), imageList);
//while (capture.read(frame))
for (int i = 0; i < imageList.size(); i++)
{
//getchar();
//imagePath = to_string(i) + ".jpg";
//sprintf_s(imagePath, "%d.jpg", i);
//printf("imagePath:%s\n", imagePath);
//frame = cv::imread("E:/opencv_4_2_0_is_installed_here/opencv/sources/samples/dnn/face_detector/0.jpg");
string imagePath = inputPath + imageList[i];
frame = cv::imread(imagePath);
if (frame.empty())
{
printf("read test jpg error\n");
}
else
{
//cout << "get image: " << imagePath << endl;
int64 start = getTickCount();
#if 0
if (frame.empty())
{
break;
}
#endif
// 水平镜像调整
// flip(frame, frame, 1);
//imshow("input", frame);
if (frame.channels() == 4)
cvtColor(frame, frame, COLOR_BGRA2BGR);
// 输入数据调整
Mat inputBlob = blobFromImage(frame, inScaleFactor,
Size(inWidth, inHeight), meanVal, false, false);
net.setInput(inputBlob, "data");
// 人脸检测
Mat detection = net.forward("detection_out");
vector layersTimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layersTimings) / freq;
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr());
ostringstream ss;
for (int i = 0; i < detectionMat.rows; i++)
{
// 置信度 0~1之间
float confidence = detectionMat.at(i, 2);
if (confidence > confidenceThreshold)
{
count++;
int xLeftBottom = static_cast(detectionMat.at(i, 3) * frame.cols);
int yLeftBottom = static_cast(detectionMat.at(i, 4) * frame.rows);
int xRightTop = static_cast(detectionMat.at(i, 5) * frame.cols);
int yRightTop = static_cast(detectionMat.at(i, 6) * frame.rows);
Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
rectangle(frame, object, Scalar(0, 255, 0));
ss << confidence;
String conf(ss.str());
String label = "Face: " + conf;
int baseLine = 0;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
Size(labelSize.width, labelSize.height + baseLine)),
Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(xLeftBottom, yLeftBottom),
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
}
}
float fps = getTickFrequency() / (getTickCount() - start);
ss.str("");
ss << "FPS: " << fps << " ; inference time: " << time << " ms";
cout << i << " ; inference time: " << time << " ms";
putText(frame, ss.str(), Point(20, 20), 0, 0.75, Scalar(0, 0, 255), 2, 8);
//imshow("dnn_face_detection", frame);
//sprintf_s(outPath, "out%d.jpg", i);
string outimagePath = outPath + imageList[i];
imwrite(outimagePath, frame);
//if (waitKey(1) >= 0) break;
//if (waitKey(1) >= 0) return;
}
//printf("total face: %d\n", count);
cout << "total face:" << count << endl;
}
}
Makefile
CC = aarch64-himix100-linux-g++
#INCLUDE_PATH = -I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-original/include/opencv4 \
-I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-original/output/include
INCLUDE_PATH = -I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-tengine/include/opencv4 \
-I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-tengine/output/include
LIB_PATH = -Wl,-rpath-link -Wl,/home/sdc/yuwy/opencv/glib-2.47.3/output/lib \
-L/home/sdc/yuwy/opencv/opencv-4.3.0/output/lib \
-L/home/sdc/yuwy/opencv/opencv-4.3.0/3rdparty/lib \
LD = -lopencv_highgui -lopencv_dnn -lopencv_features2d -lopencv_flann -lopencv_calib3d -lopencv_objdetect -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio -lopencv_core -lpthread -lrt -ldl -lzlib
all : face_detector.cpp
$(CC) $(INCLUDE_PATH) $(LIB_PATH) $(LD) face_detector.cpp -o face_detector
clean:
rm face_detector
注意代码中的模型位置和测试图片位置,注意Makefile中的lib和include位置,编译成功后就可以在Hi3559板子上运行了,记得在板子上添加动态库搜索路径
export LD_LIBRARY_PATH=/mnt/yuwy/opencv/opencv-4.3.0/lib:$LD_LIBRARY_PATH
echo $LD_LIBRARY_PATH
face_detector在模型输入为300x300时,未经过Tengine加速的单张图像处理时间约为330ms,而经过Tengine加速后单张图像的处理时间为170ms,有较大提升。
加速后的测试结果
加速前的测试结果