本项目手把手带你实现了在树莓派端基于视频流的水果分类。
PaddleDetection——VOC数据集的准备
PaddleDetection——水果分类项目实战
树莓派——CSI摄像头和USB摄像头的配置与调试
Paddle Lite——树莓派端部署实现实时视频流目标检测demo
训练后导出paddle原生模型文件:
!python -u tools/export_model.py -c configs/yolov3_mobilenet_v1_fruit.yml
在模型训练并导出完成后,使用opt模型转换工具实现模型的转换:
#下载opt文件
#这一步需要注意下载与自己Paddle-Lite版本对应的opt文件,在https://github.com/PaddlePaddle/Paddle-Lite/releases找到与自己版本对应的opt链接
!wget https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.7/opt_linux
#复制opt文件到相应目录下
!cp opt_linux /home/aistudio/PaddleDetection/output/yolov3_mobilenet_v1_fruit
#进入预测模型文件夹
%cd /home/aistudio/PaddleDetection/output/yolov3_mobilenet_v1_fruit
#给opt加上可执行权限
!chmod +x opt_linux
#使用opt进行模型转化,将__model__和__params__转化为model.nb
!./opt_linux --model_file=__model__ --param_file=__params__ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./model
#查看文件夹内容,检查模型是否转换成功
!ls
使用winscp将 .nb 模型文件放在home/pi/Paddle-Lite-Demo/PaddleLite-armlinux-demo/object_detection_demo/models/fruit_detection
文件夹下;
object_detection_demo.cc
文件这个文件是模型的c++部署代码,是整个项目工作的逻辑和核心。
由于demo的模型是ssd而水果分类用的是Yolov3,所以demo给的.cc文件并不适用,因此我们要对文件进行修改。
我在Paddle-Lite的Github上找到Yolov3的.cc的demo文件:
https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/lite/demo/cxx/yolov3_detection/yolov3_detection.cc,该demo是基于图片检测的,而我要实现的是基于视频流下的检测,稍作修改后得到内容如下:
#include
#include
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" // NOLINT
/
// If this demo is linked to static library:libpaddle_api_light_bundled.a
// , you should include `paddle_use_ops.h` and `paddle_use_kernels.h` to
// avoid linking errors such as `unsupport ops or kernels`.
/
// #include "paddle_use_kernels.h" // NOLINT
// #include "paddle_use_ops.h" // NOLINT
using namespace paddle::lite_api; // NOLINT
struct Object {
cv::Rect rec;
int class_id;
float prob;
};
int64_t ShapeProduction(const shape_t& shape) {
int64_t res = 1;
for (auto i : shape) res *= i;
return res;
}
const char* class_names[] = {
"apple", "banana", "orange",
};
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void neon_mean_scale(const float* din,
float* dout,
int size,
const std::vector<float> mean,
const std::vector<float> scale) {
if (mean.size() != 3 || scale.size() != 3) {
std::cerr << "[ERROR] mean or scale size must equal to 3\n";
exit(1);
}
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
float32x4_t vscale0 = vdupq_n_f32(1.f / scale[0]);
float32x4_t vscale1 = vdupq_n_f32(1.f / scale[1]);
float32x4_t vscale2 = vdupq_n_f32(1.f / scale[2]);
float* dout_c0 = dout;
float* dout_c1 = dout + size;
float* dout_c2 = dout + size * 2;
int i = 0;
for (; i < size - 3; i += 4) {
float32x4x3_t vin3 = vld3q_f32(din);
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
vst1q_f32(dout_c0, vs0);
vst1q_f32(dout_c1, vs1);
vst1q_f32(dout_c2, vs2);
din += 12;
dout_c0 += 4;
dout_c1 += 4;
dout_c2 += 4;
}
for (; i < size; i++) {
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
*(dout_c0++) = (*(din++) - mean[1]) * scale[1];
*(dout_c0++) = (*(din++) - mean[2]) * scale[2];
}
}
void pre_process(const cv::Mat& img, int width, int height, float* data) {
cv::Mat rgb_img;
cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(
rgb_img, rgb_img, cv::Size(width, height), 0.f, 0.f, cv::INTER_CUBIC);
cv::Mat imgf;
rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
std::vector<float> mean = {
0.485f, 0.456f, 0.406f};
std::vector<float> scale = {
0.229f, 0.224f, 0.225f};
const float* dimg = reinterpret_cast<const float*>(imgf.data);
neon_mean_scale(dimg, data, width * height, mean, scale);
}
std::vector<Object> detect_object(const float* data,
int count,
float thresh,
cv::Mat& image) {
// NOLINT
if (data == nullptr) {
std::cerr << "[ERROR] data can not be nullptr\n";
exit(1);
}
std::vector<Object> rect_out;
for (int iw = 0; iw < count; iw++) {
int oriw = image.cols;
int orih = image.rows;
if (data[1] > thresh) {
Object obj;
int x = static_cast<int>(data[2]);
int y = static_cast<int>(data[3]);
int w = static_cast<int>(data[4] - data[2] + 1);
int h = static_cast<int>(data[5] - data[3] + 1);
cv::Rect rec_clip =
cv::Rect(x, y, w, h) & cv::Rect(0, 0, image.cols, image.rows);
obj.class_id = static_cast<int>(data[0]);
obj.prob = data[1];
obj.rec = rec_clip;
if (w > 0 && h > 0 && obj.prob <= 1) {
rect_out.push_back(obj);
cv::rectangle(image, rec_clip, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
std::string str_prob = std::to_string(obj.prob);
std::string text = std::string(class_names[obj.class_id]) + ": " +
str_prob.substr(0, str_prob.find(".") + 4);
int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
double font_scale = 1.f;
int thickness = 1;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
float new_font_scale = w * 0.5 * font_scale / text_size.width;
text_size = cv::getTextSize(
text, font_face, new_font_scale, thickness, nullptr);
cv::Point origin;
origin.x = x + 3;
origin.y = y + text_size.height + 3;
cv::putText(image,
text,
origin,
font_face,
new_font_scale,
cv::Scalar(0, 255, 255),
thickness,
cv::LINE_AA);
std::cout << "detection, image size: " << image.cols << ", "
<< image.rows
<< ", detect object: " << class_names[obj.class_id]
<< ", score: " << obj.prob << ", location: x=" << x
<< ", y=" << y << ", width=" << w << ", height=" << h
<< std::endl;
}
}
data += 6;
}
return rect_out;
}
void RunModel(std::string model_file, const cv::Mat& img) {
// 1. Set MobileConfig
MobileConfig config;
config.set_model_from_file(model_file);
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
const int in_width = 320;
const int in_height = 320;
// 3. Prepare input data from image
// input 0
std::unique_ptr<Tensor> input_tensor0(std::move(predictor->GetInput(0)));
input_tensor0->Resize({
1, 3, in_height, in_width});
auto* data0 = input_tensor0->mutable_data<float>();
pre_process(img, in_width, in_height, data0);
// input1
std::unique_ptr<Tensor> input_tensor1(std::move(predictor->GetInput(1)));
input_tensor1->Resize({
1, 2});
auto* data1 = input_tensor1->mutable_data<int>();
data1[0] = img.rows;
data1[1] = img.cols;
// 4. Run predictor
predictor->Run();
// 5. Get output and post process
std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
auto* outptr = output_tensor->data<float>();
auto shape_out = output_tensor->shape();
int64_t cnt = 1;
for (auto& i : shape_out) {
cnt *= i;
}
cv::Mat output_image = img.clone();
auto rec_out = detect_object(outptr, static_cast<int>(cnt / 6), 0.5f, output_image);
cv::imshow("Object Detection Demo", output_image);
}
int main(int argc, char** argv) {
std::string model_file = argv[1];
cv::VideoCapture cap(-1);
cap.set(CV_CAP_PROP_FRAME_WIDTH, 640);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
if (!cap.isOpened()) {
return -1;
}
while (1) {
cv::Mat input_image;
cap >> input_image;
RunModel(model_file, input_image);
if (cv::waitKey(1) == char('q')) {
break;
}
}
cap.release();
cv::destroyAllWindows();
return 0;
}
关于此段代码的解读请参考:https://github.com/PaddleCV-FAQ/PaddleDetection-FAQ/blob/main/Lite%E9%83%A8%E7%BD%B2/yolov3_for_raspi.md
这一步是整个部署过程的难点。
run.sh
文件根据.cc文件中需要用到的输入参数修改(用文本编辑器打开并修改即可)。
在本项目中,我的.cc文件只需要用到model,因此只有一个model地址的参数。
#!/bin/bash
# configure
#TARGET_ARCH_ABI=armv8 # for RK3399, set to default arch abi
TARGET_ARCH_ABI=armv7hf # for Raspberry Pi 3B
PADDLE_LITE_DIR=../Paddle-Lite
if [ "x$1" != "x" ]; then
TARGET_ARCH_ABI=$1
fi
# build
rm -rf build
mkdir build
cd build
cmake -DPADDLE_LITE_DIR=${
PADDLE_LITE_DIR} -DTARGET_ARCH_ABI=${
TARGET_ARCH_ABI} ..
make
#run
#这里修改.cc文件需要的输入参数,如模型地址、图片地址等
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${
PADDLE_LITE_DIR}/libs/${
TARGET_ARCH_ABI} ./object_detection_demo ../models/fruit_detection/model.nb
cd Paddle-Lite-Demo/PaddleLite-armlinux-demo/object_detection_demo
./run.sh
https://paddle-lite.readthedocs.io/zh/latest/index.html
目标检测的结果和准确度基本正常,但是摄像头延迟非常高,大概有40s左右,并且还会出现掉帧的现象,希望能得到大佬的帮助!