部署Pytorch模型到C++环境

三种部署Pytorch模型到C++环境的方式

文章目录

  • 三种部署Pytorch模型到C++环境的方式
  • 前言
  • 一、pytorch2onnx
  • 二、三种部署的方式
    • 1.opencv加载onnx
    • 2.onnxruntime加载onnx
    • 3.libtorch部署
  • 参考资料


前言

由于工作原因需要部署Pytorch模型到c++环境下,目前大概有三种方式。
1、pytorch转成onnx文件后,通过opencv读取。
2、pytroch转成onnx文件后,通过onnxruntime读取。
3、利用libtorch库,也就是pytorch的c++版。


一、pytorch2onnx

首先的将pytorch训练好的模型导出onnx文件。

安装所需包:
pip install onnx
pip install onnxruntime

from nets.deeplabv3 import deeplabv3 #这里导入自己的模型
import torch
import os
from PIL import Image
import numpy as np
import onnx
import onnxruntime

def preprocess_input(image):
    image /= 255.0
    return image

def cvtColor(image):
    if len(np.shape(image)) == 3 and np.shape(image)[-2] == 3:
        return image
    else:
        image = image.convert('RGB')
        return image

# 检查输出
def check_onnx_output(filename, input_data, torch_output):
    print("模型测试")
    session = onnxruntime.InferenceSession(filename)
    input_name = session.get_inputs()[0].name
    result = session.run([], {input_name: input_data.detach().cpu().numpy()})
    for test_result, gold_result in zip(result, torch_output.values()):
        np.testing.assert_almost_equal(
            gold_result.cpu().numpy(), test_result, decimal=3,
        )
    return result
# 检查模型
def check_onnx_model(model, onnx_filename, input_image):
    with torch.no_grad():
        torch_out = {"output": model(input_image)}
    check_onnx_output(onnx_filename, input_image, torch_out)
    print("模型输出一致")
    onnx_model = onnx.load(onnx_filename)
    onnx.checker.check_model(onnx_model)
    print("模型测试成功")
    return onnx_model


if __name__ == '__main__':
	# 模型路径
    model_path = 'net.pth'
    onnx_path = os.path.split(model_path)[0] + '/'
    device = 'cpu'
    # 图片路径
    VOCdevkit_path ='./1.jpg'

    img = Image.open(VOCdevkit_path)
    img = cvtColor(img)
    img  = np.expand_dims(np.transpose(preprocess_input(np.array(img, np.float32)), (2, 0, 1)), 0)
    img = torch.from_numpy(img)

    net = deeplabv3 ()
    net.load_state_dict(torch.load(model_path, map_location=device), strict=True)
    net = net.eval()
    out = net(img)
    print(out)

    torch.onnx.export(net, img, onnx_path + "torch.onnx", verbose=True ,input_names=["input"], output_names=["output"], opset_version=11)

    # traced_cpu = torch.jit.trace(net, img)
    # torch.jit.save(traced_cpu, onnx_path + "cpu.pt")

    # 检测导出的onnx模型是否完整,输出是否一致
    onnx_name = onnx_path + "torch.onnx"
    onnx_model = check_onnx_model(net, onnx_name, img)

二、三种部署的方式

1.opencv加载onnx

#include 
#include 
#include 
#include 
#include 
#include 
using namespace std;

int main()
{
  String modelFile = "./torch.onnx";
  String imageFile = "./1.jpg";

  dnn::Net net = cv::dnn::readNetFromONNX(modelFile); //读取网络和参数
   
   // step 1: Read an image in HWC BGR UINT8 format.
    cv::Mat imageBGR = cv::imread(input_path, cv::ImreadModes::IMREAD_COLOR);
    // step 2: Resize the image.
    cv::Mat resizedImageRGB, resizedImage, preprocessedImage;
	resize(imageBGR , resizedImage, Size(500, 500), INTER_AREA)
    // step 3: Convert the image to HWC RGB UINT8 format.
    cv::cvtColor(resizedImage, resizedImageRGB,
        cv::ColorConversionCodes::COLOR_BGR2RGB);
    // step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
    // step 5: Split the RGB channels from the image.   
    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    //step 7: Merge the RGB channels back to the image.
    cv::merge(channels, 3, resizedImage);
    // step 8: Convert the image to CHW RGB float format.
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);
    
   net.setInput(inputBolb); //输入图像
    Mat result = net.forward(); //前向计算
    cout << result << endl;
    return 0;
}

2.onnxruntime加载onnx

安装onnxruntime 参考

下面部署的是语义分割的模型。

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace cv;
using namespace std;
using namespace cv::dnn;

bool CheckStatus(const OrtApi* g_ort, OrtStatus* status) {
    if (status != nullptr) {
        const char* msg = g_ort->GetErrorMessage(status);
        std::cerr << msg << std::endl;
        g_ort->ReleaseStatus(status);
        throw Ort::Exception(msg, OrtErrorCode::ORT_EP_FAIL);
    }
    return true;
}

// 图像处理  标准化处理
void PreProcess(const Mat& image, Mat& image_blob)
{
    Mat input;
    image.copyTo(input);

    //数据处理 标准化
    std::vector<Mat> channels, channel_p;
    split(input, channels);
    Mat R, G, B;
    B = channels.at(0);
    G = channels.at(1);
    R = channels.at(2);

    B = B / 255.0;
    G = G / 255.0;
    R = R / 255.0;

    channel_p.push_back(R);
    channel_p.push_back(G);
    channel_p.push_back(B);

    Mat outt;
    merge(channel_p, outt);
    image_blob = outt;
}


void run_ort_net(std::string backend, std::string input_path) {
#ifdef _WIN32
    const wchar_t* model_path = L"F:/visual studio workplace/torch.onnx";
#else
    const char* model_path = "F:/visual studio workplace/torch.onnx";
#endif

    const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
    OrtEnv* env;
    CheckStatus(g_ort, g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));

    OrtSessionOptions* session_options;
    CheckStatus(g_ort, g_ort->CreateSessionOptions(&session_options));
    CheckStatus(g_ort, g_ort->SetIntraOpNumThreads(session_options, 1));
    CheckStatus(g_ort, g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC));

    std::vector<const char*> options_keys = { "runtime", "buffer_type" };
    std::vector<const char*> options_values = { backend.c_str(), "FLOAT" };  // set to TF8 if use quantized data

    OrtSession* session;
    CheckStatus(g_ort, g_ort->CreateSession(env, model_path, session_options, &session));

    OrtAllocator* allocator;
    CheckStatus(g_ort, g_ort->GetAllocatorWithDefaultOptions(&allocator));
    size_t num_input_nodes;
    CheckStatus(g_ort, g_ort->SessionGetInputCount(session, &num_input_nodes));

    std::vector<const char*> input_node_names;
    std::vector<std::vector<int64_t>> input_node_dims;
    std::vector<ONNXTensorElementDataType> input_types;
    std::vector<OrtValue*> input_tensors;

    input_node_names.resize(num_input_nodes);
    input_node_dims.resize(num_input_nodes);
    input_types.resize(num_input_nodes);
    input_tensors.resize(num_input_nodes);

    for (size_t i = 0; i < num_input_nodes; i++) {
        // Get input node names
        char* input_name;
        CheckStatus(g_ort, g_ort->SessionGetInputName(session, i, allocator, &input_name));
        input_node_names[i] = input_name;

        std::cout << "input name :" << input_name << std::endl;

        // Get input node types
        OrtTypeInfo* typeinfo;
        CheckStatus(g_ort, g_ort->SessionGetInputTypeInfo(session, i, &typeinfo));
        const OrtTensorTypeAndShapeInfo* tensor_info;
        CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
        ONNXTensorElementDataType type;
        CheckStatus(g_ort, g_ort->GetTensorElementType(tensor_info, &type));
        input_types[i] = type;

        // Get input shapes/dims
        size_t num_dims;
        CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
        input_node_dims[i].resize(num_dims);
        CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, input_node_dims[i].data(), num_dims));

        std::cout << "input dims :" << num_dims << std::endl;

        size_t tensor_size;
        CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));

        if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
    }

    size_t num_output_nodes;
    std::vector<const char*> output_node_names;
    std::vector<std::vector<int64_t>> output_node_dims;
    std::vector<OrtValue*> output_tensors;
    CheckStatus(g_ort, g_ort->SessionGetOutputCount(session, &num_output_nodes));
    output_node_names.resize(num_output_nodes);
    output_node_dims.resize(num_output_nodes);
    output_tensors.resize(num_output_nodes);

    for (size_t i = 0; i < num_output_nodes; i++) {
        // Get output node names
        char* output_name;
        CheckStatus(g_ort, g_ort->SessionGetOutputName(session, i, allocator, &output_name));
        output_node_names[i] = output_name;

        std::cout << "output dims :" << output_name << std::endl;

        OrtTypeInfo* typeinfo;
        CheckStatus(g_ort, g_ort->SessionGetOutputTypeInfo(session, i, &typeinfo));
        const OrtTensorTypeAndShapeInfo* tensor_info;
        CheckStatus(g_ort, g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));

        // Get output shapes/dims
        size_t num_dims;
        CheckStatus(g_ort, g_ort->GetDimensionsCount(tensor_info, &num_dims));
        output_node_dims[i].resize(num_dims);
        CheckStatus(g_ort, g_ort->GetDimensions(tensor_info, (int64_t*)output_node_dims[i].data(), num_dims));

        std::cout << "output dims :" << num_dims << std::endl;

        size_t tensor_size;
        CheckStatus(g_ort, g_ort->GetTensorShapeElementCount(tensor_info, &tensor_size));

        if (typeinfo) g_ort->ReleaseTypeInfo(typeinfo);
    }

    //加载图片
    Mat img = imread(input_path);
    Mat det1;
    //resize(img, det1, Size(500, 500), INTER_AREA);
    img.convertTo(img, CV_32FC3);
    PreProcess(img, det1);         //标准化处理
    Mat blob = dnn::blobFromImage(det1, 1., Size(500, 500), Scalar(0, 0, 0), false, false);
    printf("Load success!\n");

    OrtMemoryInfo* memory_info;
    CheckStatus(g_ort, g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
    CheckStatus(g_ort, g_ort->CreateTensorWithDataAsOrtValue(memory_info, blob.ptr<float>(), blob.total() * sizeof(float), input_node_dims[0].data(),
        input_node_dims[0].size(), input_types[0], &input_tensors[0]));

    CheckStatus(g_ort, g_ort->Run(session, nullptr, input_node_names.data(), (const OrtValue* const*)input_tensors.data(),
        input_tensors.size(), output_node_names.data(), output_node_names.size(),
        output_tensors.data()));

    size_t output_data_size = 500 * 500;
    size_t output_data_length = output_data_size * sizeof(int64_t*);
    std::vector<int64_t*> output_data(output_data_length);
    void* output_buffer;
    CheckStatus(g_ort, g_ort->GetTensorMutableData(output_tensors[0], &output_buffer));
    int64_t* int_buffer = reinterpret_cast<int64_t*>(output_buffer);

    /* auto max = std::max_element(int_buffer, int_buffer + output_data_size);
     int max_index = static_cast(std::distance(int_buffer, max));*/

     //std::cout << *max << std::endl;

    int count = 0;
    Mat newarr = Mat_<int>(500, 500); //定义一个500*500的矩阵
    for (int i = 0; i < newarr.rows; i++)
    {
        for (int j = 0; j < newarr.cols; j++) //矩阵列数循环
        {
            if ((int)int_buffer[i * j + j] >= 1) {
                count++;
                newarr.at<int>(i, j) = 255;
                continue;
            }
            newarr.at<int>(i, j) = int_buffer[i * j + j];
        }
    }
    cout << count << endl;

    imwrite("./test.png", newarr);
    newarr = imread("./test.png", IMREAD_GRAYSCALE);
    cout << newarr.channels() << endl;
    imshow("mask", newarr);
    cv::waitKey();
}

int main(int argc, char* argv[]) {
    std::string backend = "CPU";
    std::string input_path = "./1.jpg";
    run_ort_net(backend, input_path);
    return 0;
}

结果为了更好的显示,把非背景的值置为255,如下图:
部署Pytorch模型到C++环境_第1张图片

3.libtorch部署

pytorch训练的模型,需要转换为script model,参考在C++平台上部署PyTorch模型流程+踩坑实录

#include 
#include 
#include 
#include 

int main()
{
    torch::DeviceType device_type;
    if (torch::cuda::is_available()) {
        std::cout << "CUDA available! Predicting on GPU." << std::endl;
        device_type = torch::kCUDA;
    }
    else {
        std::cout << "Predicting on CPU." << std::endl;
        device_type = torch::kCUDA;
    }
    torch::Device device(device_type);

    //Init model
    std::string model_pb = "./cpu.pth";
    auto module = torch::jit::load(model_pb);
    module.to(at::kCUDA);

    auto image = cv::imread("./1_35.jpg", cv::ImreadModes::IMREAD_COLOR);
    cv::Mat image_transfomed;
    cv::resize(image, image_transfomed, cv::Size(500, 500));

    // convert to tensort
    torch::Tensor tensor_image = torch::from_blob(image_transfomed.data,
        { image_transfomed.rows, image_transfomed.cols,3 }, torch::kByte);
    tensor_image = tensor_image.permute({ 2,0,1 });
    tensor_image = tensor_image.toType(torch::kFloat);
    tensor_image = tensor_image.div(255);
    tensor_image = tensor_image.unsqueeze(0);
    tensor_image = tensor_image.to(at::kCUDA);
    torch::Tensor output = module.forward({ tensor_image }).toTensor();
    auto max_result = output.max(1, true);
    auto max_index = std::get<1>(max_result).item<float>();
    std::cout << output << std::endl;
    //return max_index;
    return 0;
}

参考资料

[1] https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/Snpe_EP/main.cpp
[2] https://blog.csdn.net/qq_44747572/article/details/120820964?spm=1001.2014.3001.5501
[3] https://zhuanlan.zhihu.com/p/191569603
[4] https://zhuanlan.zhihu.com/p/414317269

你可能感兴趣的:(模型部署,python,深度学习,c++)