pytorch训练的模型在实际使用时往往需要转换成onnx或mnn部署,训练好的模型需先转成onnx:
import sys
import argparse
import torch
import torchvision
import torch.onnx
from mobilenetv2 import MobileNetV2
if __name__ == '__main__':
model=MobileNetV2(2)
model_path='./model/mobilenetv2.mdl'
model.eval()
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
dummy_input = torch.randn([1,3,32,32]) #batch,channel,height,width
torch.onnx.export(model, dummy_input, model_path.replace('mdl', 'onnx'), verbose=True, input_names=['input'], output_names=['output'],opset_version=11)
print('Done!')
转换成功后,再转mnn,通过MNN转换工具:
.MNNConvert -f ONNX --modelFile XXX.onnx --MNNModel XXX.mnn --bizCode biz
测试pytorch的结果:
import argparse
import os
from glob import glob
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from PIL import Image
from mobilenetv2 import MobileNetV2
import numpy as np
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--image_path', default=None,
help='the path of imgae')
args = parser.parse_args()
return args
def main():
args = parse_args()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
start = cv2.getTickCount()
# create model
model = MobileNetV2(2).to(device)
model.load_state_dict(torch.load('models/best-mobilenetv2.mdl',map_location=torch.device('cpu')))
model.eval()
img = args.image_path
cut_size = 48
tf = transforms.Compose([
lambda x: Image.open(x).convert('RGB'), # string path= > image data
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img).unsqueeze(0)
x = img.to(device)
outputs = model(x)
# 输出概率最大的类别
_, indices = torch.max(outputs, 1)
percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100
perc = percentage[int(indices)].item()
print('predicted:', perc)
print('id:', int(indices))
end = cv2.getTickCount()
during = (end - start) / cv2.getTickFrequency()
print("avg_time:", during)
if __name__ == '__main__':
main()
测试ONNX的结果,与pytorch结果一致:
import argparse
import os
from glob import glob
import onnxruntime
import onnx
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from PIL import Image
from mobilenetv2 import MobileNetV2
import numpy as np
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--image_path', default=None,
help='the path of imgae')
args = parser.parse_args()
return args
def to_numpy(tensor):
return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()
def main():
args = parse_args()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
start = cv2.getTickCount()
model = 'models/best-mobilenetv2.onnx'
onet_seeion=onnxruntime.InferenceSession(model)
img = args.image_path
cut_size = 48
tf = transforms.Compose([
lambda x: Image.open(x).convert('RGB'), # string path= > image data
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img).unsqueeze(0)
x = img.to(device)
inputs={onet_seeion.get_inputs()[0].name:to_numpy(img)}
outputs=onet_seeion.run(None,inputs)
print(outputs)
end = cv2.getTickCount()
during = (end - start) / cv2.getTickFrequency()
print("avg_time:", during)
if __name__ == '__main__':
main()
测试mnn的结果,与前面的结果一致,但是速度快了近20倍:
import argparse
import os
from glob import glob
import MNN
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from PIL import Image
from mobilenetv2 import MobileNetV2
import numpy as np
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--image_path', default=None,
help='the path of imgae')
args = parser.parse_args()
return args
def to_numpy(tensor):
return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()
def main():
args = parse_args()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
start = cv2.getTickCount()
model = 'models/best-mobilenetv2.mnn'
interpreter = MNN.Interpreter(model)
mnn_session = interpreter.createSession()
input_tensor = interpreter.getSessionInput(mnn_session)
img = args.image_path
cut_size = 48
tf = transforms.Compose([
lambda x: Image.open(x).convert('RGB'), # string path= > image data
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img).unsqueeze(0)
tmp_input = MNN.Tensor((1, 3, 32, 32), MNN.Halide_Type_Float, \
to_numpy(img[0]), MNN.Tensor_DimensionType_Caffe)
print(tmp_input.getShape())
# print(tmp_input.getData())
print(input_tensor.copyFrom(tmp_input))
input_tensor.printTensorData()
interpreter.runSession(mnn_session)
output_tensor = interpreter.getSessionOutput(mnn_session, 'output')
output_tensor.printTensorData()
output_data = np.array(output_tensor.getData())
print('mnn result is:', output_data)
print("output belong to class: {}".format(np.argmax(output_tensor.getData())))
end = cv2.getTickCount()
during = (end - start) / cv2.getTickFrequency()
print("avg_time:", during)
if __name__ == '__main__':
main()
用c++进行mnn重写测试,结果一致,这样就可以编库了:
// mnn_test.cpp : 定义控制台应用程序的入口点。
#include "stdafx.h"
#include
#include
#include
#include
#include
#include
#include
#define IMAGE_VERIFY_SIZE 32
#define CLASSES_SIZE 2
#define INPUT_NAME "input"
#define OUTPUT_NAME "output"
cv::Mat BGRToRGB(cv::Mat img)
{
cv::Mat image(img.rows, img.cols, CV_8UC3);
for (int i = 0; i(i);
cv::Vec3b *p2 = image.ptr(i);
for (int j = 0; j(MNN::Interpreter::createFromFile(mnn_model_path));
MNN::ScheduleConfig netConfig;
netConfig.type = MNN_FORWARD_CPU;
netConfig.numThread = 4;
auto session = mnnNet->createSession(netConfig);
auto input = mnnNet->getSessionInput(session, INPUT_NAME);
if (input->elementSize() <= 4) {
mnnNet->resizeTensor(input, { 1, 3, IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE });
mnnNet->resizeSession(session);
}
std::cout << "input shape: " << input->shape()[0] << " " << input->shape()[1] << " " << input->shape()[2] << " " << input->shape()[3] << std::endl;
// preprocess image
MNN::Tensor givenTensor(input, MNN::Tensor::CAFFE);
// const int inputSize = givenTensor.elementSize();
// std::cout << inputSize << std::endl;
auto inputData = givenTensor.host();
cv::Mat bgr_image = cv::imread(image_path);
bgr_image = BGRToRGB(bgr_image);
cv::Mat norm_image;
cv::resize(bgr_image, norm_image, cv::Size(IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE));
for (int k = 0; k < 3; k++) {
for (int i = 0; i < norm_image.rows; i++) {
for (int j = 0; j < norm_image.cols; j++) {
const auto src = norm_image.at(i, j)[k];
auto dst = 0.0;
if (k == 0) dst = (float(src) / 255.0f - 0.485) / 0.229;
if (k == 1) dst = (float(src) / 255.0f - 0.456) / 0.224;
if (k == 2) dst = (float(src) / 255.0f - 0.406) / 0.225;
inputData[k * IMAGE_VERIFY_SIZE * IMAGE_VERIFY_SIZE + i * IMAGE_VERIFY_SIZE + j] = dst;
}
}
}
input->copyFromHostTensor(&givenTensor);
double st = cvGetTickCount();
// run session
mnnNet->runSession(session);
double et = cvGetTickCount() - st;
et = et / ((double)cvGetTickFrequency() * 1000);
std::cout << " speed: " << et << " ms" << std::endl;
// get output data
auto output = mnnNet->getSessionOutput(session, OUTPUT_NAME);
// std::cout << "output shape: " << output->shape()[0] << " " << output->shape()[1] << std::endl;
auto output_host = std::make_shared(output, MNN::Tensor::CAFFE);
output->copyToHostTensor(output_host.get());
auto values = output_host->host();
// post process
std::vector output_values;
auto exp_sum = 0.0;
auto max_index = 0;
for (int i = 0; i < CLASSES_SIZE; i++) {
if (values[i] > values[max_index]) max_index = i;
output_values.push_back(values[i]);
exp_sum += std::exp(values[i]);
}
std::cout << "output: " << output_values[0]<<","<< output_values[1] << std::endl;
std::cout << "id: " << max_index << std::endl;
std::cout << "prob: " << std::exp(output_values[max_index]) / exp_sum << std::endl;
system("pause");
return 0;
}