计算模型的运算律和推理时间

from thop import profile
import torch
import numpy as np
import tqdm


h = 1080
w = 1920
input = torch.randn(1, 2, 3, h, w).to(device)
test_mode = torch.Tensor([True]).to(device)
print(input.shape)

Flops, params = profile(model, inputs=(input, test_mode,))
print('Flops: % .4fG'%(Flops / 1000000000))# 计算量
print('params参数量: % .4fM'% (params / 1000000)) #参数量:等价与上面的summary输出的Total params值

repetitions = 1000
dummy_input = torch.rand(1, 2, 3, h, w).to(device)

# 预热, GPU 平时可能为了节能而处于休眠状态, 因此需要预热
print('warm up ...\n')
with torch.no_grad():
    for _ in range(10):
        _ = model(dummy_input, test_mode)

# synchronize 等待所有 GPU 任务处理完才返回 CPU 主线程
torch.cuda.synchronize()

# 设置用于测量时间的 cuda Event, 这是PyTorch 官方推荐的接口,理论上应该最靠谱
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
# 初始化一个时间容器
timings = np.zeros((repetitions, 1))

print('testing ...\n')
with torch.no_grad():
    for rep in tqdm.tqdm(range(repetitions)):
        starter.record()
        _ = model(dummy_input, test_mode)
        ender.record()
        torch.cuda.synchronize()  # 等待GPU任务完成
        curr_time = starter.elapsed_time(ender)  # 从 starter 到 ender 之间用时,单位为毫秒
        timings[rep] = curr_time

mean_time = timings.mean().item()
print("Inference time: {:.6f}, FPS: {} ".format(mean_time, 1000/mean_time))

你可能感兴趣的:(深度学习,python)