pytorch转onnx转mnn并验证

pytorch训练的模型在实际使用时往往需要转换成onnx或mnn部署,训练好的模型需先转成onnx:

import sys
import argparse
import torch
import torchvision
import torch.onnx

from  mobilenetv2  import MobileNetV2


if __name__ == '__main__':
    
    model=MobileNetV2(2)
    model_path='./model/mobilenetv2.mdl'
    model.eval()
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    dummy_input = torch.randn([1,3,32,32])   #batch,channel,height,width
    torch.onnx.export(model, dummy_input, model_path.replace('mdl', 'onnx'), verbose=True, input_names=['input'], output_names=['output'],opset_version=11)
    print('Done!')

转换成功后,再转mnn,通过MNN转换工具:

.MNNConvert -f ONNX --modelFile XXX.onnx --MNNModel XXX.mnn --bizCode biz

测试pytorch的结果:

import argparse
import os
from glob import glob
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    # create model
    model = MobileNetV2(2).to(device)
    model.load_state_dict(torch.load('models/best-mobilenetv2.mdl',map_location=torch.device('cpu')))
    model.eval()
    
    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    img = tf(img).unsqueeze(0)

    x = img.to(device)

    outputs = model(x)

    # 输出概率最大的类别
    _, indices = torch.max(outputs, 1)
    percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100
    perc = percentage[int(indices)].item()

    print('predicted:', perc)
    print('id:', int(indices))
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)
    
if __name__ == '__main__':
    main()

测试ONNX的结果,与pytorch结果一致:

import argparse
import os
from glob import glob
import onnxruntime
import onnx
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args

def to_numpy(tensor):
    return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()

def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    model = 'models/best-mobilenetv2.onnx'
    onet_seeion=onnxruntime.InferenceSession(model)

    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    img = tf(img).unsqueeze(0)

    x = img.to(device)

    inputs={onet_seeion.get_inputs()[0].name:to_numpy(img)}
    outputs=onet_seeion.run(None,inputs)
    print(outputs)
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)


if __name__ == '__main__':
    main()

测试mnn的结果,与前面的结果一致,但是速度快了近20倍:

import argparse
import os
from glob import glob
import MNN
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args

def to_numpy(tensor):
    return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()

def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    model = 'models/best-mobilenetv2.mnn'
    interpreter = MNN.Interpreter(model)

    mnn_session = interpreter.createSession()
    input_tensor = interpreter.getSessionInput(mnn_session)
   
    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    img = tf(img).unsqueeze(0)

    tmp_input = MNN.Tensor((1, 3, 32, 32), MNN.Halide_Type_Float, \
                           to_numpy(img[0]), MNN.Tensor_DimensionType_Caffe)
    print(tmp_input.getShape())
    # print(tmp_input.getData())
    print(input_tensor.copyFrom(tmp_input))
    input_tensor.printTensorData()

    interpreter.runSession(mnn_session)
    output_tensor = interpreter.getSessionOutput(mnn_session, 'output')
    output_tensor.printTensorData()
    output_data = np.array(output_tensor.getData())
    print('mnn result is:', output_data)
    print("output belong to class: {}".format(np.argmax(output_tensor.getData())))
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)


if __name__ == '__main__':
    main()

用c++进行mnn重写测试,结果一致,这样就可以编库了:

// mnn_test.cpp : 定义控制台应用程序的入口点。

#include "stdafx.h"
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define IMAGE_VERIFY_SIZE 32
#define CLASSES_SIZE 2
#define INPUT_NAME "input"
#define OUTPUT_NAME "output"


cv::Mat BGRToRGB(cv::Mat img)
{
	cv::Mat image(img.rows, img.cols, CV_8UC3);
	for (int i = 0; i(i);
		cv::Vec3b *p2 = image.ptr(i);
		for (int j = 0; j(MNN::Interpreter::createFromFile(mnn_model_path));
	MNN::ScheduleConfig netConfig;
	netConfig.type = MNN_FORWARD_CPU;
	netConfig.numThread = 4;
	auto session = mnnNet->createSession(netConfig);

	auto input = mnnNet->getSessionInput(session, INPUT_NAME);
	if (input->elementSize() <= 4) {
		mnnNet->resizeTensor(input, { 1, 3, IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE });
		mnnNet->resizeSession(session);
	}
	std::cout << "input shape: " << input->shape()[0] << " " << input->shape()[1] << " " << input->shape()[2] << " " << input->shape()[3] << std::endl;

	// preprocess image
	MNN::Tensor givenTensor(input, MNN::Tensor::CAFFE);
	// const int inputSize = givenTensor.elementSize();
	// std::cout << inputSize << std::endl;
	auto inputData = givenTensor.host();
	cv::Mat bgr_image = cv::imread(image_path);
	bgr_image = BGRToRGB(bgr_image);
	cv::Mat norm_image;
	cv::resize(bgr_image, norm_image, cv::Size(IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE));
	for (int k = 0; k < 3; k++) {
		for (int i = 0; i < norm_image.rows; i++) {
			for (int j = 0; j < norm_image.cols; j++) {
				const auto src = norm_image.at(i, j)[k];
				auto dst = 0.0;
				if (k == 0) dst = (float(src) / 255.0f - 0.485) / 0.229;
				if (k == 1) dst = (float(src) / 255.0f - 0.456) / 0.224;
				if (k == 2) dst = (float(src) / 255.0f - 0.406) / 0.225;
				inputData[k * IMAGE_VERIFY_SIZE * IMAGE_VERIFY_SIZE + i * IMAGE_VERIFY_SIZE + j] = dst;
			}
		}
	}
	input->copyFromHostTensor(&givenTensor);
	double st = cvGetTickCount();
	// run session
	mnnNet->runSession(session);
	double et = cvGetTickCount() - st;
	et = et / ((double)cvGetTickFrequency() * 1000);
	std::cout << " speed: " << et << " ms" << std::endl;
	// get output data
	auto output = mnnNet->getSessionOutput(session, OUTPUT_NAME);
	// std::cout << "output shape: " << output->shape()[0] << " " << output->shape()[1] << std::endl;
	auto output_host = std::make_shared(output, MNN::Tensor::CAFFE);
	output->copyToHostTensor(output_host.get());
	auto values = output_host->host();

	// post process
	std::vector output_values;
	auto exp_sum = 0.0;
	auto max_index = 0;
	for (int i = 0; i < CLASSES_SIZE; i++) {
		if (values[i] > values[max_index]) max_index = i;
		output_values.push_back(values[i]);
		exp_sum += std::exp(values[i]);
	}
	
	std::cout << "output: " << output_values[0]<<","<< output_values[1] << std::endl;
	std::cout << "id: " << max_index << std::endl;
	std::cout << "prob: " << std::exp(output_values[max_index]) / exp_sum << std::endl;
	system("pause");
	return 0;
}

 

你可能感兴趣的:(pytorch转onnx转mnn并验证)