alexnet pytorch模型和onnx模型速度对比

# -*- coding: utf-8 -*-
from tqdm import tqdm
import pandas as pd
import time
import torch
import torchvision
import numpy as np
import onnxruntime as ort


def save_onnx(path, device):
    dummy_input = torch.randn(3, 3, 224, 224).to(device)
    model = torchvision.models.alexnet(pretrained=False).to(device)
    input_names = ["actual_input_1"] + ["learned_%d" % i for i in range(16)]
    output_names = ["output1"]
    torch.onnx.export(model, dummy_input, path, verbose=False, input_names=input_names, output_names=output_names,
                      dynamic_axes={'actual_input_1': [0], 'output1': [0]})
    # print(model)
    return model


def torch_t(model, batch_size, device):
    s = time.time()
    input_ = torch.randn(batch_size, 3, 224, 224).to(device)
    for i in range(epoch):
        model(input_)
    cost = time.time() - s
    return round(cost, 2)


def onnx_t_fun(path, batch_size, device):
    if device == 'cpu':
        ort_session = ort.InferenceSession(path, providers=['CPUExecutionProvider'])
    else:
        ort_session = ort.InferenceSession(path, providers=['CUDAExecutionProvider'])
    s = time.time()
    input_ = np.random.randn(batch_size, 3, 224, 224).astype(np.float32)
    for i in range(epoch):
        ort_session.run(
            None,
            {"actual_input_1": input_},
        )
    cost = time.time() - s
    return round(cost, 2)


if __name__ == '__main__':

    batch_size_list = [2 ** i for i in range(8)]
    device_list = ['cpu', torch.device(0)]
    tuples = [(device, batch_size) for device in device_list for batch_size in batch_size_list]
    index = pd.MultiIndex.from_tuples(tuples)
    epoch = 100
    path = "alexnet.onnx"
    df = pd.DataFrame(columns=['torch', 'onnx'], index=index)
    for batch_size in tqdm(batch_size_list):
        for device in device_list:
            model = save_onnx(path, device)
            cost_torch = torch_t(model, batch_size, device)
            cost_onnx = onnx_t_fun(path, batch_size, device)
            df.loc[(device, batch_size), :] = [cost_torch, cost_onnx]
    print(df)

            torch   onnx
cpu    1      1.2   0.89
         2     2.47   1.15
         4     2.86   1.68
         8     3.67    2.7
         16    5.93   4.72
         32    9.47   8.85
         64   17.33  17.26
        128  32.77  34.27
cuda:0  1     0.07   0.42
            2     0.08   0.65
           4     0.12    0.6
           8      0.2   0.27
           16    0.24    0.4
           32    0.38   0.89
           64    0.69   1.72
          128   1.26   3.12

可以发现cpu时,onnx在batch_size较小时,速度能提升部分,随着batch_size变大,越来越慢

gpu上反而变慢了,不知道是否合理

cpu查询命令为cat /proc/cpuinfo | grep 'model name' |uniq

结果为 Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz

你可能感兴趣的:(pytorch,onnx,pytorch,python,深度学习)