最近做项目需要在手机上跑深度学习的模型,踩了不少坑,做个记录。这里以mobilenetv3为例子。
1.环境配置
pytorch 1.0 之后自带onnx输出,所以是个很好的选择,顺便装个caffe2验证不同后端加载的模型输出结果是否一致。如果用conda配置,就几行命令。
conda install pytorch-nightly-cpu -c pytorch
conda install -c conda-forge onnx
2.mobilenetv3 修改自 rwightman 的代码,模型参数也是这位大神训练的。修改后的代码见此。
拿一张图像先跑一下
import time
import numpy as np
import torch
from PIL import Image
import mobilenetv3
import onnx
import caffe2.python.onnx.backend as backend
src_image = 'cat.jpg'
input_size = 224
mean_vals = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32).view(3,1,1)
std_vals = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32).view(3,1,1)
imagenet_ids = []
with open("synset_words.txt", "r") as f:
for line in f.readlines():
imagenet_ids.append(line.strip())
###############################prepare model####################################
infer_device = torch.device('cpu')
res_model = mobilenetv3.MobileNetV3()
state_dict = torch.load('./mobilenetv3x1.0.pth', map_location=lambda storage, loc: storage)
res_model.load_state_dict(state_dict)
res_model.to(infer_device)
res_model.eval()
################################prepare test image#####################################
pil_im = Image.open(src_image).convert('RGB')
pil_im_resize = pil_im.resize((input_size, input_size), Image.BILINEAR)
origin_im_tensor = torch.ByteTensor(torch.ByteStorage.from_buffer(pil_im_resize.tobytes()))
origin_im_tensor = origin_im_tensor.view(input_size, input_size, 3)
origin_im_tensor = origin_im_tensor.transpose(0, 1).transpose(0, 2).contiguous()
origin_im_tensor = (origin_im_tensor.float()/255 - mean_vals)/std_vals
origin_im_tensor = origin_im_tensor.unsqueeze(0)
###########################test################################
t1=time.time()
with torch.no_grad():
pred = res_model(origin_im_tensor.to(infer_device))
predidx = torch.argmax(pred, dim=1)
t2 = time.time()
print(t2 - t1)
print("predict result: ", imagenet_ids[predidx])
不出意外,预测应该是猫
0.03892230987548828
predict result: n02123597 Siamese cat, Siamese
3.pytorch导出onnx并检查,代码接上段
##################export###############
output_onnx = 'mobilenetv3.onnx'
x = origin_im_tensor
print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
input_names = ["input0"]
output_names = ["output0"]
torch_out = torch.onnx._export(res_model, x, output_onnx, export_params=True, verbose=False, input_names=input_names, output_names=output_names)
print("==> Loading and checking exported model from '{}'".format(output_onnx))
onnx_model = onnx.load(output_onnx)
onnx.checker.check_model(onnx_model) # assuming throw on error
print("==> Passed")
4.继续,将onnx模型导入caffe2比较一下输出结果,
print("==> Loading onnx model into Caffe2 backend and comparing forward pass.".format(output_onnx))
caffe2_backend = backend.prepare(onnx_model)
B = {"input0": x.data.numpy()}
c2_out = caffe2_backend.run(B)["output0"]
print("==> compare torch output and caffe2 output")
np.testing.assert_almost_equal(torch_out.data.numpy(), c2_out, decimal=5)
print("==> Passed")
都没有问题就ok了。
1.环境准备,ncnn是腾讯的开源框架(支持一下国产……),纯c++,因为转换模型和pc端demo依赖protocbuf和opencv,所以除了gcc,cmake外,还要配一下环境:
protocbuf版本注意和onnx依赖的版本一致,以免不必要的麻烦,这里安装的是3.7.1
unzip protobuf-all-3.7.1.zip
cd protobuf-all-3.7.1
./configure --prefix=/usr/local
make -j4
make check -j4
sudo make install
sudo ldconfig
opencv这里用的是2.4.13.6
##依赖项
sudo apt-get install build-essential
sudo apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
sudo apt-get install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev
##源码编译
unzip opencv-2.4.13.6.zip
cd opencv-2.4.13.6
mkdir build
cd build
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local ..
make -j8
sudo make install
sudo ldconfig
2.把ncnn的源码clone下来,我们这里主要想使用tools下面的onnx转换工具。
这里作一点修改,pytorch1.0之后支持0维的张量,这在ncnn转换中会出现问题,修改onnx2ncnn.cpp中Constant和MemoryData的转换,有2处
if (M.dims_size() == 1) {
fprintf(pp, " 0=%d", (int)M.dims(0));
} else if (M.dims_size() == 2) {
fprintf(pp, " 0=%d", (int)M.dims(1));
fprintf(pp, " 1=%d", (int)M.dims(0));
} else if (M.dims_size() == 3) {
fprintf(pp, " 0=%d", (int)M.dims(2));
fprintf(pp, " 1=%d", (int)M.dims(1));
fprintf(pp, " 2=%d", (int)M.dims(0));
} else if (get_tensor_proto_data_size(M)==1) {
// scalar tensor!!!
fprintf(pp, " 0=1");
}
if (M.dims_size() == 1) {
fprintf(pp, " 0=%d", (int)M.dims(0));
} else if (M.dims_size() == 2) {
fprintf(pp, " 0=%d", (int)M.dims(1));
} else if (M.dims_size() == 3) {
fprintf(pp, " 0=%d", (int)M.dims(2));
fprintf(pp, " 1=%d", (int)M.dims(1));
} else if (M.dims_size() == 4) {
fprintf(pp, " 0=%d", (int)M.dims(3));
fprintf(pp, " 1=%d", (int)M.dims(2));
fprintf(pp, " 2=%d", (int)M.dims(1));
} else if (get_tensor_proto_data_size(M)==1) {
// scalar tensor!!!
fprintf(pp, " 0=1");
}
编译一把
cd ncnn
mkdir -p build
cd build
cmake ..
make -j4
这样在build/tools/onnx/ 目录下就有转换工具onnx2ncnn了。
3.先别急着转换,onnx转换模型时有一些冗余,我们用工具简化一些onnx模型
pip3 install onnx-simplifier
然后简化我们的mobilenetv3.onnx
python3 -m onnxsim mobilenetv3.onnx mobilenetv3-sim.onnx
现在可以转ncnn了
onnx2ncnn mobilenetv3-sim.onnx mobilenetv3.param mobilenetv3.bin
4.转换完先在pc上测试一把
在ncnn/examples 目录下新建一个mobilenetv3.cpp,然后参考squeezenet的例子,简单修改一下
//
// Created by zjw on 19-6-28.
//
#include
#include
#include
#include
#include
#include "platform.h"
#include "net.h"
#if NCNN_VULKAN
#include "gpu.h"
#endif // NCNN_VULKAN
static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
{
ncnn::Net squeezenet;
#if NCNN_VULKAN
squeezenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN
squeezenet.load_param("mobilenetv3.param");
squeezenet.load_model("mobilenetv3.bin");
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 224, 224);
const float mean_vals[3] = {123.675f, 116.28f, 103.53f};
const float std_vals[3] = {1/58.395f, 1/57.12f, 1/57.375f};
in.substract_mean_normalize(mean_vals, std_vals);
fprintf(stderr, "input shape: %d %d %d %d\n", in.dims, in.h, in.w, in.c);
ncnn::Extractor ex = squeezenet.create_extractor();
ex.input("input0", in);
ncnn::Mat out;
ex.extract("output0", out);
fprintf(stderr, "output shape: %d %d %d %d\n", out.dims, out.h, out.w, out.c);
cls_scores.resize(out.w);
for (int j=0; j<out.w; j++)
{
cls_scores[j] = out[j];
}
return 0;
}
static int print_topk(const std::vector<float>& cls_scores, std::vector<int>& index_vec, int topk)
{
// partial sort topk with index
int size = cls_scores.size();
std::vector< std::pair<float, int> > vec;
vec.resize(size);
for (int i=0; i<size; i++)
{
vec[i] = std::make_pair(cls_scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater< std::pair<float, int> >());
// print topk and score
for (int i=0; i<topk; i++)
{
float score = vec[i].first;
int index = vec[i].second;
index_vec.push_back(index);
fprintf(stderr, "%d = %f\n", index, score);
}
return 0;
}
static int load_labels(std::string path, std::vector<std::string>& labels)
{
FILE* fp = fopen(path.c_str(), "r");
while (!feof(fp))
{
char str[1024];
fgets(str, 1024, fp); //¶ÁÈ¡Ò»ÐÐ
std::string str_s(str);
if (str_s.length() > 0)
{
for (int i = 0; i < str_s.length(); i++)
{
if (str_s[i] == ' ')
{
std::string strr = str_s.substr(i, str_s.length() - i - 1);
labels.push_back(strr);
i = str_s.length();
}
}
}
}
return 0;
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
// show results
std::vector<std::string> labels;
load_labels("synset_words.txt", labels);
#if NCNN_VULKAN
ncnn::create_gpu_instance();
#endif // NCNN_VULKAN
std::vector<float> cls_scores;
detect_squeezenet(m, cls_scores);
#if NCNN_VULKAN
ncnn::destroy_gpu_instance();
#endif // NCNN_VULKAN
std::vector<int> index;
print_topk(cls_scores, index, 3);
for (int i = 0; i < index.size(); i++)
{
cv::putText(m, labels[index[i]], cv::Point(50, 50 + 30 * i), CV_FONT_HERSHEY_SIMPLEX, 1.2, cv::Scalar(0, 100, 200), 2, 8);
}
cv::imshow("m", m);
cv::imwrite("test_result.jpg", m);
cv::waitKey(0);
return 0;
}
修改examples下的CMakeLists.txt, 添加一下我们的例子
add_executable(mobilenetv3 mobilenetv3.cpp)
target_link_libraries(mobilenetv3 ${NCNN_EXAMPLE_LINK_LIBRARIES})
在ncnn根目录下放开编译examples
add_subdirectory(examples)
重新make一把,在build/examples/ 下就有mobilenetv3这个例子了,把转换得到的ncnn模型mobilenetv3.param、mobilenetv3.bin和synset_words.txt全移到该目录下,找一张图片跑一下,这次换个狗的图片
貌似是ok的
1.这一步基本可以参考ncnn上列的3个Example project,ncnn的库可以采用对应版本的预编译库,也可以自己编译。真机测试时参考adb说明,确保手机可以识别。手机打开USB调试。
#Make sure that you are in the plugdev group
#Use id to see what groups you are in. Use sudo usermod -aG plugdev $LOGNAME to add yourself to the plugdev group
apt-get install adb
cd android_sdk/platform-tools/
adb devices
2.编译成功后,点击run,选择对应手机即可,工程的例子见此。
这个例子中,CPU识别的结果是正确的。mobilenetv3转换到ncnn后有一些 BinaryOp的操作,但是目前vulkan的实现中暂不支持 BinaryOp(broadcasting),所以如果选择 识别-gpu,结果是不正确的。