改造前crnn网络的cnn部分网络代码:
class CNN0(nn.Module):
def __init__(self,imageHeight,nChannel):
super(CNN0,self).__init__()
assert imageHeight % 32 == 0,'image Height has to be a multiple of 32'
self.conv0 = nn.Conv2d(in_channels=nChannel,out_channels=64,kernel_size=3,stride=1,padding=1)
self.relu0 = nn.ReLU(inplace=True)
self.pool0 = nn.MaxPool2d(kernel_size=2,stride=2)
self.conv1 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
self.conv2 = nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1)
self.batchNorm2 = nn.BatchNorm2d(256)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
self.relu3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
self.batchNorm4 = nn.BatchNorm2d(512)
self.relu4 = nn.ReLU(inplace=True)
self.conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
self.relu5 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)
self.batchNorm6 = nn.BatchNorm2d(512)
self.relu6= nn.ReLU(inplace=True)
def forward(self,input):
conv0 = self.conv0(input)
relu0 = self.relu0(conv0)
pool0 = self.pool0(relu0)
print(pool0.size())
conv1 = self.conv1(pool0)
relu1 = self.relu1(conv1)
pool1 = self.pool1(relu1)
print(pool1.size())
conv2 = self.conv2(pool1)
batchNormal2 = self.batchNorm2(conv2)
relu2 = self.relu2(batchNormal2)
print(relu2.size())
conv3 = self.conv3(relu2)
relu3 = self.relu3(conv3)
pool3 = self.pool3(relu3)
print(pool3.size())
conv4 = self.conv4(pool3)
batchNormal4 = self.batchNorm4(conv4)
relu4 = self.relu4(batchNormal4)
print(relu4.size())
conv5 = self.conv5(relu4)
relu5 = self.relu5(conv5)
pool5 = self.pool5(relu5)
print(pool5.size())
conv6 = self.conv6(pool5)
batchNormal6 = self.batchNorm6(conv6)
relu6 = self.relu6(batchNormal6)
print(relu6.size())
return relu6
使用torchsummary打印网络的结构和参数:
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = CNN0(32, 1).to(device)
print(summary(net, input_size=(1, 32, 320)))
运行结果:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 320] 640
ReLU-2 [-1, 64, 32, 320] 0
MaxPool2d-3 [-1, 64, 16, 160] 0
Conv2d-4 [-1, 128, 16, 160] 73,856
ReLU-5 [-1, 128, 16, 160] 0
MaxPool2d-6 [-1, 128, 8, 80] 0
Conv2d-7 [-1, 256, 8, 80] 295,168
BatchNorm2d-8 [-1, 256, 8, 80] 512
ReLU-9 [-1, 256, 8, 80] 0
Conv2d-10 [-1, 256, 8, 80] 590,080
ReLU-11 [-1, 256, 8, 80] 0
MaxPool2d-12 [-1, 256, 4, 81] 0
Conv2d-13 [-1, 512, 4, 81] 1,180,160
BatchNorm2d-14 [-1, 512, 4, 81] 1,024
ReLU-15 [-1, 512, 4, 81] 0
Conv2d-16 [-1, 512, 4, 81] 2,359,808
ReLU-17 [-1, 512, 4, 81] 0
MaxPool2d-18 [-1, 512, 2, 82] 0
Conv2d-19 [-1, 512, 1, 81] 1,049,088
BatchNorm2d-20 [-1, 512, 1, 81] 1,024
ReLU-21 [-1, 512, 1, 81] 0
================================================================
Total params: 5,551,360
Trainable params: 5,551,360
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.04
Forward/backward pass size (MB): 31.68
Params size (MB): 21.18
Estimated Total Size (MB): 52.89
----------------------------------------------------------------
使用深度可分离卷积改造的crnn网络中cnn部分网络:
class CNN(nn.Module):
def __init__(self,imageHeight,nChannel):
super(CNN,self).__init__()
assert imageHeight % 32 == 0,'image Height has to be a multiple of 32'
self.depth_conv0 = nn.Conv2d(in_channels=nChannel,out_channels=nChannel,kernel_size=3,stride=1,padding=1,groups=nChannel)
self.point_conv0 = nn.Conv2d(in_channels=nChannel,out_channels=64,kernel_size=1,stride=1,padding=0,groups=1)
self.relu0 = nn.ReLU(inplace=True)
self.pool0 = nn.MaxPool2d(kernel_size=2,stride=2)
self.depth_conv1 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride=1,padding=1,groups=64)
self.point_conv1 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=1,stride=1,padding=0,groups=1)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
self.depth_conv2 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,stride=1,padding=1,groups=128)
self.point_conv2 = nn.Conv2d(in_channels=128,out_channels=256,kernel_size=1,stride=1,padding=0,groups=1)
self.batchNorm2 = nn.BatchNorm2d(256)
self.relu2 = nn.ReLU(inplace=True)
self.depth_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)
self.point_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)
self.relu3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
self.depth_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)
self.point_conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.batchNorm4 = nn.BatchNorm2d(512)
self.relu4 = nn.ReLU(inplace=True)
self.depth_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, groups=512)
self.point_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.relu5 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(kernel_size=(2,2),stride=(2,1),padding=(0,1))
#self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)
self.depth_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, groups=512)
self.point_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.batchNorm6 = nn.BatchNorm2d(512)
self.relu6= nn.ReLU(inplace=True)
def forward(self,input):
depth0 = self.depth_conv0(input)
point0 = self.point_conv0(depth0)
relu0 = self.relu0(point0)
pool0 = self.pool0(relu0)
depth1 = self.depth_conv1(pool0)
point1 = self.point_conv1(depth1)
relu1 = self.relu1(point1)
pool1 = self.pool1(relu1)
depth2 = self.depth_conv2(pool1)
point2 = self.point_conv2(depth2)
batchNormal2 = self.batchNorm2(point2)
relu2 = self.relu2(batchNormal2)
depth3 = self.depth_conv3(relu2)
point3 = self.point_conv3(depth3)
relu3 = self.relu3(point3)
pool3 = self.pool3(relu3)
depth4 = self.depth_conv4(pool3)
point4 = self.point_conv4(depth4)
batchNormal4 = self.batchNorm4(point4)
relu4 = self.relu4(batchNormal4)
depth5 = self.depth_conv5(relu4)
point5 = self.point_conv5(depth5)
relu5 = self.relu5(point5)
pool5 = self.pool5(relu5)
depth6 = self.depth_conv6(pool5)
point6 = self.point_conv6(depth6)
batchNormal6 = self.batchNorm6(point6)
relu6 = self.relu6(batchNormal6)
return relu6
打印的结果:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 1, 32, 320] 10
Conv2d-2 [-1, 64, 32, 320] 128
ReLU-3 [-1, 64, 32, 320] 0
MaxPool2d-4 [-1, 64, 16, 160] 0
Conv2d-5 [-1, 64, 16, 160] 640
Conv2d-6 [-1, 128, 16, 160] 8,320
ReLU-7 [-1, 128, 16, 160] 0
MaxPool2d-8 [-1, 128, 8, 80] 0
Conv2d-9 [-1, 128, 8, 80] 1,280
Conv2d-10 [-1, 256, 8, 80] 33,024
BatchNorm2d-11 [-1, 256, 8, 80] 512
ReLU-12 [-1, 256, 8, 80] 0
Conv2d-13 [-1, 256, 8, 80] 2,560
Conv2d-14 [-1, 256, 8, 80] 65,792
ReLU-15 [-1, 256, 8, 80] 0
MaxPool2d-16 [-1, 256, 4, 81] 0
Conv2d-17 [-1, 256, 4, 81] 2,560
Conv2d-18 [-1, 512, 4, 81] 131,584
BatchNorm2d-19 [-1, 512, 4, 81] 1,024
ReLU-20 [-1, 512, 4, 81] 0
Conv2d-21 [-1, 512, 4, 81] 5,120
Conv2d-22 [-1, 512, 4, 81] 262,656
ReLU-23 [-1, 512, 4, 81] 0
MaxPool2d-24 [-1, 512, 2, 82] 0
Conv2d-25 [-1, 512, 1, 81] 2,560
Conv2d-26 [-1, 512, 1, 81] 262,656
BatchNorm2d-27 [-1, 512, 1, 81] 1,024
ReLU-28 [-1, 512, 1, 81] 0
================================================================
Total params: 781,450
Trainable params: 781,450
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.04
Forward/backward pass size (MB): 37.09
Params size (MB): 2.98
Estimated Total Size (MB): 40.11
----------------------------------------------------------------
可以看到,网络参数的数量大约减少了9倍,至于网络性能怎样,还没有测试。