参考链接: pytorch实用工具:torchsummary
参考链接: 【DL】torch小技巧之网络参数统计 torchstat & torchsummary
参考链接: 【Deep Learning】计算PyTorch模型的FLOPs
安装:
pip install torchsummary
pip install torchstat
pip install thop
结论:
'''
结论:
网络大小: 12.3MB
macs: 4216450944.0 params: 3206976.0
--------
macs: 4.216G params: 3.207M
Total Flops: 4.25GFlops
Total params: 3,206,976
训练参数数量: 3,206,976
Params size (MB): 12.23
Total params: 3,206,976
--------------------------------------------testing CPU---------------------------------------------
CPU time: 1.3852598667144775
--------------------------------------------testing GPU---------------------------------------------
GPU time: 1.0919568538665771
'''
代码展示:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module): # (N,in_planes,H,W) -> (N,out_planes,H,W)
'''Depthwise conv + Pointwise conv
实现深度卷积和逐点卷积'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, in_planes, kernel_size=3, stride=stride,
padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(
in_planes, out_planes, kernel_size=1,
stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
self.relu_1 = torch.nn.ReLU()
self.relu_2 = torch.nn.ReLU()
def forward(self, x):
out = self.relu_1(self.bn1(self.conv1(x)))
out = self.relu_2(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2,
# by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2),
512, 512, 512, 512, 512, (1024,2), 1024] # out_planes, stride
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
stride=1, padding=1, bias=False) # 只改变通道数
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
# self.linear = nn.Linear(1024, num_classes)
self.relu = torch.nn.ReLU()
def _make_layers(self, in_planes):
layers = []
for x in self.cfg: # out_planes, stride
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.ModuleList(layers)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x))) # torch.Size([1, 32, 32, 32])
# print(out.shape)
for index, layer in enumerate(self.layers):
out = layer(out) # torch.Size([1, 1024, 2, 2])
# print(index, out.shape)
if index == 7:
self.intermediate = out
# print('intermediate.shape:', self.intermediate.shape)
return out
if __name__ == '__main__':
from thop import profile
from thop import clever_format
net = MobileNet()
x = torch.randn(1,3,300,300)
# y = net(x)
macs, params = profile(net, inputs=(x, ))
print('macs:', macs,'params:', params) # 16865803776.0 3206976.0
print('--------')
macs, params = clever_format([macs, params], "%.3f")
print('macs:', macs,'params:', params) # 16.866G 3.207M
# print('输出数据的维度是:', y.size())
# if __name__ == '__main__':
# from torchstat import stat
# model = MobileNet()
# stat(model, (3, 300, 300)) # Total Flops: 4.25GFlops 4,252,887,040.0 Total params: 3,206,976
# if __name__ == '__main__':
# from torchsummary import summary
# model = MobileNet()
# torch.save(model.state_dict(), '临时文件MobileNet.pth')
# summary(model.cuda(), input_size=(3, 300, 300), batch_size=-1) # Total params: 3,206,976 Params size (MB): 12.23
# if __name__ == '__main__':
# import time
# print('testing CPU'.center(100,'-'))
# net = MobileNet().cpu()
# x = torch.randn(4,3,300,300)
# start = time.time()
# y = net(x)
# end = time.time()
# print("CPU time:", end-start)
# print('testing GPU'.center(100,'-'))
# net = MobileNet().cuda()
# x = torch.randn(4,3,300,300).cuda()
# start = time.time()
# y = net(x)
# end = time.time()
# print("GPU time:", end-start)
'''
结论:
网络大小: 12.3MB
macs: 16865803776.0 params: 3206976.0
--------
macs: 16.866G params: 3.207M
Total Flops: 4.25GFlops
Total params: 3,206,976
训练参数数量: 3,206,976
Params size (MB): 12.23
Total params: 3,206,976
--------------------------------------------testing CPU---------------------------------------------
CPU time: 1.3852598667144775
--------------------------------------------testing GPU---------------------------------------------
GPU time: 1.0919568538665771
'''
控制台输出结果展示::
Windows PowerShell
版权所有 (C) Microsoft Corporation。保留所有权利。
尝试新的跨平台 PowerShell https://aka.ms/pscore6
加载个人及系统配置文件用了 999 毫秒。
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> & 'D:\Anaconda3\envs\pytorch_1.7.1_cu102\python.exe' 'c:\Users\chenxuqi\.vscode\extensions\ms-python.python-2021.1.502429796\pythonFiles\lib\python\debugpy\launcher' '58585' '--' 'f:\Iris_SSD_small\ssd-pytorch-master(轻量化)\nets\mobilenet4ssd.py'
[INFO] Register count_convNd() for .
[INFO] Register count_bn() for .
[INFO] Register zero_ops() for .
[WARN] Cannot find rule for . Treat it as zero Macs and zero Params.
[WARN] Cannot find rule for . Treat it as zero Macs and zero Params.
[WARN] Cannot find rule for . Treat it as zero Macs and zero Params.
macs: 4216450944.0 params: 3206976.0
--------
macs: 4.216G params: 3.207M
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> conda activate pytorch_1.7.1_cu102
(pytorch_1.7.1_cu102) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)>
代码展示:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module): # (N,in_planes,H,W) -> (N,out_planes,H,W)
'''Depthwise conv + Pointwise conv
实现深度卷积和逐点卷积'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, in_planes, kernel_size=3, stride=stride,
padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(
in_planes, out_planes, kernel_size=1,
stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
self.relu_1 = torch.nn.ReLU()
self.relu_2 = torch.nn.ReLU()
def forward(self, x):
out = self.relu_1(self.bn1(self.conv1(x)))
out = self.relu_2(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2,
# by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2),
512, 512, 512, 512, 512, (1024,2), 1024] # out_planes, stride
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
stride=1, padding=1, bias=False) # 只改变通道数
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
# self.linear = nn.Linear(1024, num_classes)
self.relu = torch.nn.ReLU()
def _make_layers(self, in_planes):
layers = []
for x in self.cfg: # out_planes, stride
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.ModuleList(layers)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x))) # torch.Size([1, 32, 32, 32])
# print(out.shape)
for index, layer in enumerate(self.layers):
out = layer(out) # torch.Size([1, 1024, 2, 2])
# print(index, out.shape)
if index == 7:
self.intermediate = out
# print('intermediate.shape:', self.intermediate.shape)
return out
# if __name__ == '__main__':
# from thop import profile
# from thop import clever_format
# net = MobileNet()
# x = torch.randn(4,3,300,300)
# # y = net(x)
# macs, params = profile(net, inputs=(x, ))
# print(macs, params) # 16865803776.0 3206976.0
# print('--------')
# macs, params = clever_format([macs, params], "%.3f")
# print(macs, params) # 16.866G 3.207M
# # print('输出数据的维度是:', y.size())
if __name__ == '__main__':
from torchstat import stat
model = MobileNet()
stat(model, (3, 300, 300)) # Total Flops: 4.25GFlops 4,252,887,040.0 Total params: 3,206,976
# if __name__ == '__main__':
# from torchsummary import summary
# model = MobileNet()
# torch.save(model.state_dict(), '临时文件MobileNet.pth')
# summary(model.cuda(), input_size=(3, 300, 300), batch_size=-1) # Total params: 3,206,976 Params size (MB): 12.23
# torch.Size([4, 32, 300, 300])
# 0 torch.Size([4, 64, 300, 300])
# 1 torch.Size([4, 128, 150, 150])
# 2 torch.Size([4, 128, 150, 150])
# 3 torch.Size([4, 256, 75, 75])
# 4 torch.Size([4, 256, 75, 75])
# 5 torch.Size([4, 512, 38, 38])
# 6 torch.Size([4, 512, 38, 38])
# 7 torch.Size([4, 512, 38, 38])
# intermediate.shape: torch.Size([4, 512, 38, 38])
# 8 torch.Size([4, 512, 38, 38])
# 9 torch.Size([4, 512, 38, 38])
# 10 torch.Size([4, 512, 38, 38])
# 11 torch.Size([4, 1024, 19, 19])
# 12 torch.Size([4, 1024, 19, 19])
# 输出数据的维度是: torch.Size([4, 1024, 19, 19])
控制台输出结果展示::
Windows PowerShell
版权所有 (C) Microsoft Corporation。保留所有权利。
尝试新的跨平台 PowerShell https://aka.ms/pscore6
加载个人及系统配置文件用了 962 毫秒。
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> conda activate pytorch_1.7.1_cu102
(pytorch_1.7.1_cu102) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> & 'D:\Anaconda3\envs\pytorch_1.7.1_cu102\python.exe' 'c:\Users\chenxuqi\.vscode\extensions\ms-python.python-2021.1.502429796\pythonFiles\lib\python\debugpy\launcher' '54957' '--' 'f:\Iris_SSD_small\ssd-pytorch-master(轻量化)\nets\mobilenet4ssd.py'
module name input shape output shape params memory(MB) MAdd Flops MemRead(B) MemWrite(B) duration[%] MemR+W(B)
0 conv1 3 300 300 32 300 300 864.0 10.99 152,640,000.0 77,760,000.0 1083456.0 11520000.0 3.93% 12603456.0
1 bn1 32 300 300 32 300 300 64.0 10.99 11,520,000.0 5,760,000.0 11520256.0 11520000.0 2.24% 23040256.0
2 layers.0.conv1 32 300 300 32 300 300 288.0 10.99 48,960,000.0 25,920,000.0 11521152.0 11520000.0 3.65% 23041152.0
3 layers.0.bn1 32 300 300 32 300 300 64.0 10.99 11,520,000.0 5,760,000.0 11520256.0 11520000.0 1.40% 23040256.0
4 layers.0.conv2 32 300 300 64 300 300 2048.0 21.97 362,880,000.0 184,320,000.0 11528192.0 23040000.0 6.17% 34568192.0
5 layers.0.bn2 64 300 300 64 300 300 128.0 21.97 23,040,000.0 11,520,000.0 23040512.0 23040000.0 3.09% 46080512.0
6 layers.0.relu_1 32 300 300 32 300 300 0.0 10.99 2,880,000.0 2,880,000.0 11520000.0 11520000.0 2.24% 23040000.0
7 layers.0.relu_2 64 300 300 64 300 300 0.0 21.97 5,760,000.0 5,760,000.0 23040000.0 23040000.0 2.81% 46080000.0
8 layers.1.conv1 64 300 300 64 150 150 576.0 5.49 24,480,000.0 12,960,000.0 23042304.0 5760000.0 3.37% 28802304.0
9 layers.1.bn1 64 150 150 64 150 150 128.0 5.49 5,760,000.0 2,880,000.0 5760512.0 5760000.0 0.84% 11520512.0
10 layers.1.conv2 64 150 150 128 150 150 8192.0 10.99 365,760,000.0 184,320,000.0 5792768.0 11520000.0 3.09% 17312768.0
11 layers.1.bn2 128 150 150 128 150 150 256.0 10.99 11,520,000.0 5,760,000.0 11521024.0 11520000.0 1.96% 23041024.0
12 layers.1.relu_1 64 150 150 64 150 150 0.0 5.49 1,440,000.0 1,440,000.0 5760000.0 5760000.0 0.84% 11520000.0
13 layers.1.relu_2 128 150 150 128 150 150 0.0 10.99 2,880,000.0 2,880,000.0 11520000.0 11520000.0 1.96% 23040000.0
14 layers.2.conv1 128 150 150 128 150 150 1152.0 10.99 48,960,000.0 25,920,000.0 11524608.0 11520000.0 3.08% 23044608.0
15 layers.2.bn1 128 150 150 128 150 150 256.0 10.99 11,520,000.0 5,760,000.0 11521024.0 11520000.0 1.68% 23041024.0
16 layers.2.conv2 128 150 150 128 150 150 16384.0 10.99 734,400,000.0 368,640,000.0 11585536.0 11520000.0 3.93% 23105536.0
17 layers.2.bn2 128 150 150 128 150 150 256.0 10.99 11,520,000.0 5,760,000.0 11521024.0 11520000.0 1.41% 23041024.0
18 layers.2.relu_1 128 150 150 128 150 150 0.0 10.99 2,880,000.0 2,880,000.0 11520000.0 11520000.0 1.96% 23040000.0
19 layers.2.relu_2 128 150 150 128 150 150 0.0 10.99 2,880,000.0 2,880,000.0 11520000.0 11520000.0 1.96% 23040000.0
20 layers.3.conv1 128 150 150 128 75 75 1152.0 2.75 12,240,000.0 6,480,000.0 11524608.0 2880000.0 1.96% 14404608.0
21 layers.3.bn1 128 75 75 128 75 75 256.0 2.75 2,880,000.0 1,440,000.0 2881024.0 2880000.0 0.56% 5761024.0
22 layers.3.conv2 128 75 75 256 75 75 32768.0 5.49 367,200,000.0 184,320,000.0 3011072.0 5760000.0 1.68% 8771072.0
23 layers.3.bn2 256 75 75 256 75 75 512.0 5.49 5,760,000.0 2,880,000.0 5762048.0 5760000.0 1.12% 11522048.0
24 layers.3.relu_1 128 75 75 128 75 75 0.0 2.75 720,000.0 720,000.0 2880000.0 2880000.0 0.28% 5760000.0
25 layers.3.relu_2 256 75 75 256 75 75 0.0 5.49 1,440,000.0 1,440,000.0 5760000.0 5760000.0 0.84% 11520000.0
26 layers.4.conv1 256 75 75 256 75 75 2304.0 5.49 24,480,000.0 12,960,000.0 5769216.0 5760000.0 1.96% 11529216.0
27 layers.4.bn1 256 75 75 256 75 75 512.0 5.49 5,760,000.0 2,880,000.0 5762048.0 5760000.0 0.84% 11522048.0
28 layers.4.conv2 256 75 75 256 75 75 65536.0 5.49 735,840,000.0 368,640,000.0 6022144.0 5760000.0 2.81% 11782144.0
29 layers.4.bn2 256 75 75 256 75 75 512.0 5.49 5,760,000.0 2,880,000.0 5762048.0 5760000.0 1.12% 11522048.0
30 layers.4.relu_1 256 75 75 256 75 75 0.0 5.49 1,440,000.0 1,440,000.0 5760000.0 5760000.0 0.85% 11520000.0
31 layers.4.relu_2 256 75 75 256 75 75 0.0 5.49 1,440,000.0 1,440,000.0 5760000.0 5760000.0 0.84% 11520000.0
32 layers.5.conv1 256 75 75 256 38 38 2304.0 1.41 6,284,288.0 3,326,976.0 5769216.0 1478656.0 1.11% 7247872.0
33 layers.5.bn1 256 38 38 256 38 38 512.0 1.41 1,478,656.0 739,328.0 1480704.0 1478656.0 0.28% 2959360.0
34 layers.5.conv2 256 38 38 512 38 38 131072.0 2.82 377,796,608.0 189,267,968.0 2002944.0 2957312.0 1.40% 4960256.0
35 layers.5.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
36 layers.5.relu_1 256 38 38 256 38 38 0.0 1.41 369,664.0 369,664.0 1478656.0 1478656.0 0.28% 2957312.0
37 layers.5.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.56% 5914624.0
38 layers.6.conv1 512 38 38 512 38 38 4608.0 2.82 12,568,576.0 6,653,952.0 2975744.0 2957312.0 0.84% 5933056.0
39 layers.6.bn1 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
40 layers.6.conv2 512 38 38 512 38 38 262144.0 2.82 756,332,544.0 378,535,936.0 4005888.0 2957312.0 1.40% 6963200.0
41 layers.6.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
42 layers.6.relu_1 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
43 layers.6.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.56% 5914624.0
44 layers.7.conv1 512 38 38 512 38 38 4608.0 2.82 12,568,576.0 6,653,952.0 2975744.0 2957312.0 0.84% 5933056.0
45 layers.7.bn1 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
46 layers.7.conv2 512 38 38 512 38 38 262144.0 2.82 756,332,544.0 378,535,936.0 4005888.0 2957312.0 1.96% 6963200.0
47 layers.7.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
48 layers.7.relu_1 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.56% 5914624.0
49 layers.7.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.56% 5914624.0
50 layers.8.conv1 512 38 38 512 38 38 4608.0 2.82 12,568,576.0 6,653,952.0 2975744.0 2957312.0 0.84% 5933056.0
51 layers.8.bn1 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.28% 5918720.0
52 layers.8.conv2 512 38 38 512 38 38 262144.0 2.82 756,332,544.0 378,535,936.0 4005888.0 2957312.0 1.96% 6963200.0
53 layers.8.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
54 layers.8.relu_1 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
55 layers.8.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.56% 5914624.0
56 layers.9.conv1 512 38 38 512 38 38 4608.0 2.82 12,568,576.0 6,653,952.0 2975744.0 2957312.0 0.84% 5933056.0
57 layers.9.bn1 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.56% 5918720.0
58 layers.9.conv2 512 38 38 512 38 38 262144.0 2.82 756,332,544.0 378,535,936.0 4005888.0 2957312.0 1.40% 6963200.0
59 layers.9.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.71% 5918720.0
60 layers.9.relu_1 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
61 layers.9.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
62 layers.10.conv1 512 38 38 512 38 38 4608.0 2.82 12,568,576.0 6,653,952.0 2975744.0 2957312.0 0.83% 5933056.0
63 layers.10.bn1 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.29% 5918720.0
64 layers.10.conv2 512 38 38 512 38 38 262144.0 2.82 756,332,544.0 378,535,936.0 4005888.0 2957312.0 1.96% 6963200.0
65 layers.10.bn2 512 38 38 512 38 38 1024.0 2.82 2,957,312.0 1,478,656.0 2961408.0 2957312.0 0.28% 5918720.0
66 layers.10.relu_1 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
67 layers.10.relu_2 512 38 38 512 38 38 0.0 2.82 739,328.0 739,328.0 2957312.0 2957312.0 0.28% 5914624.0
68 layers.11.conv1 512 38 38 512 19 19 4608.0 0.71 3,142,144.0 1,663,488.0 2975744.0 739328.0 0.56% 3715072.0
69 layers.11.bn1 512 19 19 512 19 19 1024.0 0.71 739,328.0 369,664.0 743424.0 739328.0 0.00% 1482752.0
70 layers.11.conv2 512 19 19 1024 19 19 524288.0 1.41 378,166,272.0 189,267,968.0 2836480.0 1478656.0 1.40% 4315136.0
71 layers.11.bn2 1024 19 19 1024 19 19 2048.0 1.41 1,478,656.0 739,328.0 1486848.0 1478656.0 0.28% 2965504.0
72 layers.11.relu_1 512 19 19 512 19 19 0.0 0.71 184,832.0 184,832.0 739328.0 739328.0 0.00% 1478656.0
73 layers.11.relu_2 1024 19 19 1024 19 19 0.0 1.41 369,664.0 369,664.0 1478656.0 1478656.0 0.28% 2957312.0
74 layers.12.conv1 1024 19 19 1024 19 19 9216.0 1.41 6,284,288.0 3,326,976.0 1515520.0 1478656.0 0.56% 2994176.0
75 layers.12.bn1 1024 19 19 1024 19 19 2048.0 1.41 1,478,656.0 739,328.0 1486848.0 1478656.0 0.28% 2965504.0
76 layers.12.conv2 1024 19 19 1024 19 19 1048576.0 1.41 756,702,208.0 378,535,936.0 5672960.0 1478656.0 1.96% 7151616.0
77 layers.12.bn2 1024 19 19 1024 19 19 2048.0 1.41 1,478,656.0 739,328.0 1486848.0 1478656.0 0.00% 2965504.0
78 layers.12.relu_1 1024 19 19 1024 19 19 0.0 1.41 369,664.0 369,664.0 1478656.0 1478656.0 0.28% 2957312.0
79 layers.12.relu_2 1024 19 19 1024 19 19 0.0 1.41 369,664.0 369,664.0 1478656.0 1478656.0 0.00% 2957312.0
80 relu 32 300 300 32 300 300 0.0 10.99 2,880,000.0 2,880,000.0 11520000.0 11520000.0 1.96% 23040000.0
total 3206976.0 416.98 8,432,901,888.0 4,252,887,040.0 11520000.0 11520000.0 100.00% 886895552.0
=============================================================================================================================================================
Total params: 3,206,976
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Total memory: 416.98MB
Total MAdd: 8.43GMAdd
Total Flops: 4.25GFlops
Total MemR+W: 845.81MB
(pytorch_1.7.1_cu102) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)>
代码展示:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module): # (N,in_planes,H,W) -> (N,out_planes,H,W)
'''Depthwise conv + Pointwise conv
实现深度卷积和逐点卷积'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, in_planes, kernel_size=3, stride=stride,
padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(
in_planes, out_planes, kernel_size=1,
stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
self.relu_1 = torch.nn.ReLU()
self.relu_2 = torch.nn.ReLU()
def forward(self, x):
out = self.relu_1(self.bn1(self.conv1(x)))
out = self.relu_2(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2,
# by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2),
512, 512, 512, 512, 512, (1024,2), 1024] # out_planes, stride
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
stride=1, padding=1, bias=False) # 只改变通道数
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
# self.linear = nn.Linear(1024, num_classes)
self.relu = torch.nn.ReLU()
def _make_layers(self, in_planes):
layers = []
for x in self.cfg: # out_planes, stride
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.ModuleList(layers)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x))) # torch.Size([1, 32, 32, 32])
# print(out.shape)
for index, layer in enumerate(self.layers):
out = layer(out) # torch.Size([1, 1024, 2, 2])
# print(index, out.shape)
if index == 7:
self.intermediate = out
# print('intermediate.shape:', self.intermediate.shape)
return out
# if __name__ == '__main__':
# from thop import profile
# from thop import clever_format
# net = MobileNet()
# x = torch.randn(4,3,300,300)
# # y = net(x)
# macs, params = profile(net, inputs=(x, ))
# print(macs, params) # 16865803776.0 3206976.0
# print('--------')
# macs, params = clever_format([macs, params], "%.3f")
# print(macs, params) # 16.866G 3.207M
# # print('输出数据的维度是:', y.size())
# if __name__ == '__main__':
# from torchstat import stat
# model = MobileNet()
# stat(model, (3, 300, 300)) # Total Flops: 4.25GFlops 4,252,887,040.0 Total params: 3,206,976
if __name__ == '__main__':
from torchsummary import summary
model = MobileNet()
torch.save(model.state_dict(), '临时文件MobileNet.pth')
summary(model.cuda(), input_size=(3, 300, 300), batch_size=-1) # Total params: 3,206,976 Params size (MB): 12.23
# torch.Size([4, 32, 300, 300])
# 0 torch.Size([4, 64, 300, 300])
# 1 torch.Size([4, 128, 150, 150])
# 2 torch.Size([4, 128, 150, 150])
# 3 torch.Size([4, 256, 75, 75])
# 4 torch.Size([4, 256, 75, 75])
# 5 torch.Size([4, 512, 38, 38])
# 6 torch.Size([4, 512, 38, 38])
# 7 torch.Size([4, 512, 38, 38])
# intermediate.shape: torch.Size([4, 512, 38, 38])
# 8 torch.Size([4, 512, 38, 38])
# 9 torch.Size([4, 512, 38, 38])
# 10 torch.Size([4, 512, 38, 38])
# 11 torch.Size([4, 1024, 19, 19])
# 12 torch.Size([4, 1024, 19, 19])
# 输出数据的维度是: torch.Size([4, 1024, 19, 19])
控制台输出结果展示::
Windows PowerShell
版权所有 (C) Microsoft Corporation。保留所有权利。
尝试新的跨平台 PowerShell https://aka.ms/pscore6
加载个人及系统配置文件用了 1035 毫秒。
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> & 'D:\Anaconda3\envs\pytorch_1.7.1_cu102\python.exe' 'c:\Users\chenxuqi\.vscode\extensions\ms-python.python-2021.1.502429796\pythonFiles\lib\python\debugpy\launcher' '54967' '--' 'f:\Iris_SSD_small\ssd-pytorch-master(轻量化)\nets\mobilenet4ssd.py'
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 300, 300] 864
BatchNorm2d-2 [-1, 32, 300, 300] 64
ReLU-3 [-1, 32, 300, 300] 0
Conv2d-4 [-1, 32, 300, 300] 288
BatchNorm2d-5 [-1, 32, 300, 300] 64
ReLU-6 [-1, 32, 300, 300] 0
Conv2d-7 [-1, 64, 300, 300] 2,048
BatchNorm2d-8 [-1, 64, 300, 300] 128
ReLU-9 [-1, 64, 300, 300] 0
Block-10 [-1, 64, 300, 300] 0
Conv2d-11 [-1, 64, 150, 150] 576
BatchNorm2d-12 [-1, 64, 150, 150] 128
ReLU-13 [-1, 64, 150, 150] 0
Conv2d-14 [-1, 128, 150, 150] 8,192
BatchNorm2d-15 [-1, 128, 150, 150] 256
ReLU-16 [-1, 128, 150, 150] 0
Block-17 [-1, 128, 150, 150] 0
Conv2d-18 [-1, 128, 150, 150] 1,152
BatchNorm2d-19 [-1, 128, 150, 150] 256
ReLU-20 [-1, 128, 150, 150] 0
Conv2d-21 [-1, 128, 150, 150] 16,384
BatchNorm2d-22 [-1, 128, 150, 150] 256
ReLU-23 [-1, 128, 150, 150] 0
Block-24 [-1, 128, 150, 150] 0
Conv2d-25 [-1, 128, 75, 75] 1,152
BatchNorm2d-26 [-1, 128, 75, 75] 256
ReLU-27 [-1, 128, 75, 75] 0
Conv2d-28 [-1, 256, 75, 75] 32,768
BatchNorm2d-29 [-1, 256, 75, 75] 512
ReLU-30 [-1, 256, 75, 75] 0
Block-31 [-1, 256, 75, 75] 0
Conv2d-32 [-1, 256, 75, 75] 2,304
BatchNorm2d-33 [-1, 256, 75, 75] 512
ReLU-34 [-1, 256, 75, 75] 0
Conv2d-35 [-1, 256, 75, 75] 65,536
BatchNorm2d-36 [-1, 256, 75, 75] 512
ReLU-37 [-1, 256, 75, 75] 0
Block-38 [-1, 256, 75, 75] 0
Conv2d-39 [-1, 256, 38, 38] 2,304
BatchNorm2d-40 [-1, 256, 38, 38] 512
ReLU-41 [-1, 256, 38, 38] 0
Conv2d-42 [-1, 512, 38, 38] 131,072
BatchNorm2d-43 [-1, 512, 38, 38] 1,024
ReLU-44 [-1, 512, 38, 38] 0
Block-45 [-1, 512, 38, 38] 0
Conv2d-46 [-1, 512, 38, 38] 4,608
BatchNorm2d-47 [-1, 512, 38, 38] 1,024
ReLU-48 [-1, 512, 38, 38] 0
Conv2d-49 [-1, 512, 38, 38] 262,144
BatchNorm2d-50 [-1, 512, 38, 38] 1,024
ReLU-51 [-1, 512, 38, 38] 0
Block-52 [-1, 512, 38, 38] 0
Conv2d-53 [-1, 512, 38, 38] 4,608
BatchNorm2d-54 [-1, 512, 38, 38] 1,024
ReLU-55 [-1, 512, 38, 38] 0
Conv2d-56 [-1, 512, 38, 38] 262,144
BatchNorm2d-57 [-1, 512, 38, 38] 1,024
ReLU-58 [-1, 512, 38, 38] 0
Block-59 [-1, 512, 38, 38] 0
Conv2d-60 [-1, 512, 38, 38] 4,608
BatchNorm2d-61 [-1, 512, 38, 38] 1,024
ReLU-62 [-1, 512, 38, 38] 0
Conv2d-63 [-1, 512, 38, 38] 262,144
BatchNorm2d-64 [-1, 512, 38, 38] 1,024
ReLU-65 [-1, 512, 38, 38] 0
Block-66 [-1, 512, 38, 38] 0
Conv2d-67 [-1, 512, 38, 38] 4,608
BatchNorm2d-68 [-1, 512, 38, 38] 1,024
ReLU-69 [-1, 512, 38, 38] 0
Conv2d-70 [-1, 512, 38, 38] 262,144
BatchNorm2d-71 [-1, 512, 38, 38] 1,024
ReLU-72 [-1, 512, 38, 38] 0
Block-73 [-1, 512, 38, 38] 0
Conv2d-74 [-1, 512, 38, 38] 4,608
BatchNorm2d-75 [-1, 512, 38, 38] 1,024
ReLU-76 [-1, 512, 38, 38] 0
Conv2d-77 [-1, 512, 38, 38] 262,144
ReLU-79 [-1, 512, 38, 38] 0
Block-80 [-1, 512, 38, 38] 0
Conv2d-81 [-1, 512, 19, 19] 4,608
BatchNorm2d-82 [-1, 512, 19, 19] 1,024
ReLU-83 [-1, 512, 19, 19] 0
Conv2d-84 [-1, 1024, 19, 19] 524,288
BatchNorm2d-85 [-1, 1024, 19, 19] 2,048
ReLU-86 [-1, 1024, 19, 19] 0
Block-87 [-1, 1024, 19, 19] 0
Conv2d-88 [-1, 1024, 19, 19] 9,216
BatchNorm2d-89 [-1, 1024, 19, 19] 2,048
ReLU-90 [-1, 1024, 19, 19] 0
Conv2d-91 [-1, 1024, 19, 19] 1,048,576
BatchNorm2d-92 [-1, 1024, 19, 19] 2,048
ReLU-93 [-1, 1024, 19, 19] 0
Block-94 [-1, 1024, 19, 19] 0
================================================================
Total params: 3,206,976
Trainable params: 3,206,976
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.03
Forward/backward pass size (MB): 983.30
Params size (MB): 12.23
Estimated Total Size (MB): 996.57
----------------------------------------------------------------
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> conda activate pytorch_1.7.1_cu102
(pytorch_1.7.1_cu102) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)>
测试CPU和GPU耗时,实验代码展示:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module): # (N,in_planes,H,W) -> (N,out_planes,H,W)
'''Depthwise conv + Pointwise conv
实现深度卷积和逐点卷积'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, in_planes, kernel_size=3, stride=stride,
padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(
in_planes, out_planes, kernel_size=1,
stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
self.relu_1 = torch.nn.ReLU()
self.relu_2 = torch.nn.ReLU()
def forward(self, x):
out = self.relu_1(self.bn1(self.conv1(x)))
out = self.relu_2(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2,
# by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2),
512, 512, 512, 512, 512, (1024,2), 1024] # out_planes, stride
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
stride=1, padding=1, bias=False) # 只改变通道数
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
# self.linear = nn.Linear(1024, num_classes)
self.relu = torch.nn.ReLU()
def _make_layers(self, in_planes):
layers = []
for x in self.cfg: # out_planes, stride
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.ModuleList(layers)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x))) # torch.Size([1, 32, 32, 32])
# print(out.shape)
for index, layer in enumerate(self.layers):
out = layer(out) # torch.Size([1, 1024, 2, 2])
# print(index, out.shape)
if index == 7:
self.intermediate = out
# print('intermediate.shape:', self.intermediate.shape)
return out
# if __name__ == '__main__':
# from thop import profile
# from thop import clever_format
# net = MobileNet()
# x = torch.randn(4,3,300,300)
# # y = net(x)
# macs, params = profile(net, inputs=(x, ))
# print('macs:', macs,'params:', params) # 16865803776.0 3206976.0
# print('--------')
# macs, params = clever_format([macs, params], "%.3f")
# print('macs:', macs,'params:', params) # 16.866G 3.207M
# # print('输出数据的维度是:', y.size())
# if __name__ == '__main__':
# from torchstat import stat
# model = MobileNet()
# stat(model, (3, 300, 300)) # Total Flops: 4.25GFlops 4,252,887,040.0 Total params: 3,206,976
# if __name__ == '__main__':
# from torchsummary import summary
# model = MobileNet()
# torch.save(model.state_dict(), '临时文件MobileNet.pth')
# summary(model.cuda(), input_size=(3, 300, 300), batch_size=-1) # Total params: 3,206,976 Params size (MB): 12.23
if __name__ == '__main__':
import time
print('testing CPU'.center(100,'-'))
net = MobileNet().cpu()
x = torch.randn(4,3,300,300)
start = time.time()
y = net(x)
end = time.time()
print("CPU time:", end-start)
print('testing GPU'.center(100,'-'))
net = MobileNet().cuda()
x = torch.randn(4,3,300,300).cuda()
start = time.time()
y = net(x)
end = time.time()
print("GPU time:", end-start)
'''
结论:
网络大小: 12.3MB
macs: 4216450944.0 params: 3206976.0
--------
macs: 4.216G params: 3.207M
Total Flops: 4.25GFlops
Total params: 3,206,976
训练参数数量: 3,206,976
Params size (MB): 12.23
Total params: 3,206,976
--------------------------------------------testing CPU---------------------------------------------
CPU time: 1.3852598667144775
--------------------------------------------testing GPU---------------------------------------------
GPU time: 1.0919568538665771
'''
控制台结果输出:
Windows PowerShell
版权所有 (C) Microsoft Corporation。保留所有权利。
尝试新的跨平台 PowerShell https://aka.ms/pscore6
加载个人及系统配置文件用了 1094 毫秒。
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> & 'D:\Anaconda3\envs\pytorch_1.7.1_cu102\python.exe' 'c:\Users\chenxuqi\.vscode\extensions\ms-python.python-2021.1.502429796\pythonFiles\lib\python\debugpy\launcher' '57971' '--' 'f:\Iris_SSD_small\ssd-pytorch-master(轻量化)\nets\mobilenet4ssd.py'
--------------------------------------------testing CPU---------------------------------------------
CPU time: 1.4291470050811768
--------------------------------------------testing GPU---------------------------------------------
GPU time: 1.0876507759094238
(base) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)> conda activate pytorch_1.7.1_cu102
(pytorch_1.7.1_cu102) PS F:\Iris_SSD_small\ssd-pytorch-master(轻量化)>