转换基本流程:
1)创建pytorch的网络结构模型;
2)利用caffe来读取其存储的预训练模型,用于读取caffe模型的参数;
3)遍历caffe加载的模型参数;
4)对一些指定的key值,需要进行相应的处理和转换;
5)对修改后的层名(key值),利用numpy之间的转换来实现加载;
6)对相应层进行参数(feature)进行比较;
以下以SE_Resnet50为例,将caffe上的模型转换到pytorch上;
1)创建pytorch的网络结构模型:
class Resnet50(Module):
def __init__(self, embedding_size = 512, class_num=0):
super(Resnet50, self).__init__()
self.conv0 = Conv2d(3, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
self.bn0 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
self.relu0 = PReLU(64)
self.stage1_unit1_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)#stage1_unit1_bn1
self.stage1_unit1_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1, 1), bias=False)
self.stage1_unit1_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
self.stage1_unit1_relu1 = PReLU(64)
self.stage1_unit1_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1, 1), bias=False)
self.stage1_unit1_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit1_bn3_scale
self.stage1_unit1_se_pool1 = AdaptiveAvgPool2d(1)
# self.stage1_unit1_se_pool1 = AvgPool2d(3, stride=1)
self.stage1_unit1_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit1_se_relu1 = PReLU(4)
self.stage1_unit1_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit1_se_sigmoid = Sigmoid()
self.stage1_unit1_conv1sc = Conv2d(64, out_channels=64, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage1_unit1_sc = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit1_sc_scale #relu
# self._plus0 = #axpy
self.stage1_unit2_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn1_scale
self.stage1_unit2_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit2_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn2_scale
self.stage1_unit2_relu1 = PReLU(64)
self.stage1_unit2_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit2_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit2_bn3_scale
self.stage1_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage1_unit2_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit2_se_relu1 = PReLU(4)
self.stage1_unit2_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit2_se_sigmoid = Sigmoid()
# self._plus1 #axpy
self.stage1_unit3_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn1_scale
self.stage1_unit3_conv1 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit3_bn2 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn2_scale
self.stage1_unit3_relu1 = PReLU(64)
self.stage1_unit3_conv2 = Conv2d(64, out_channels=64, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage1_unit3_bn3 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage1_unit3_bn3_scale
self.stage1_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage1_unit3_se_conv1 = Conv2d(64, out_channels=4, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit3_se_relu1 = PReLU(4)
self.stage1_unit3_se_conv2 = Conv2d(4, out_channels=64, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage1_unit3_se_sigmoid = Sigmoid()
# self._plus2 #Axpy
self.stage2_unit1_bn1 = BatchNorm2d(64, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn1_scale
self.stage2_unit1_conv1 = Conv2d(64, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit1_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn2_scale
self.stage2_unit1_relu1 = PReLU(128)
self.stage2_unit1_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
self.stage2_unit1_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_bn3_scale
self.stage2_unit1_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit1_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit1_se_relu1 = PReLU(8)
self.stage2_unit1_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit1_se_sigmoid = Sigmoid()
self.stage2_unit1_conv1sc = Conv2d(64, out_channels=128, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage2_unit1_sc = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit1_sc_scale
# self._plus3 #axpy
self.stage2_unit2_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn1_scale
self.stage2_unit2_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit2_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn2_scale
self.stage2_unit2_relu1 = PReLU(128)
self.stage2_unit2_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit2_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit2_bn3_scale
self.stage2_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit2_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit2_se_relu1 = PReLU(8)
self.stage2_unit2_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit2_se_sigmoid = Sigmoid()
# self._plus4
self.stage2_unit3_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn1_scale
self.stage2_unit3_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit3_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn2_scale
self.stage2_unit3_relu1 = PReLU(128)
self.stage2_unit3_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit3_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit3_bn3_scale
self.stage2_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit3_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit3_se_relu1 = PReLU(8)
self.stage2_unit3_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit3_se_sigmoid = Sigmoid()
# self._plus5
self.stage2_unit4_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn1_scale
self.stage2_unit4_conv1 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit4_bn2 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn2_scale
self.stage2_unit4_relu1 = PReLU(128)
self.stage2_unit4_conv2 = Conv2d(128, out_channels=128, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage2_unit4_bn3 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage2_unit4_bn3_scale
self.stage2_unit4_se_pool1 = AdaptiveAvgPool2d(1)
self.stage2_unit4_se_conv1 = Conv2d(128, out_channels=8, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit4_se_relu1 = PReLU(8)
self.stage2_unit4_se_conv2 = Conv2d(8, out_channels=128, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage2_unit4_se_sigmoid = Sigmoid()
# self._plus6
self.stage3_unit1_bn1 = BatchNorm2d(128, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn1_scale
self.stage3_unit1_conv1 = Conv2d(128, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit1_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn2_scale
self.stage3_unit1_relu1 = PReLU(256)
self.stage3_unit1_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
self.stage3_unit1_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit1_bn3_scale
self.stage3_unit1_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit1_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit1_se_relu1 = PReLU(16)
self.stage3_unit1_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit1_se_sigmoid = Sigmoid()
self.stage3_unit1_conv1sc = Conv2d(128, out_channels=256, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage3_unit1_sc = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit1_sc_scale
# self._plus7
self.stage3_unit2_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit2_bn1_scale
self.stage3_unit2_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit2_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit2_bn2_scale
self.stage3_unit2_relu1 = PReLU(256)
self.stage3_unit2_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit2_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit2_bn3_scale
self.stage3_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit2_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit2_se_relu1 = PReLU(16)
self.stage3_unit2_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit2_se_sigmoid = Sigmoid()
# self._plus8
self.stage3_unit3_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit3_bn1_scale
self.stage3_unit3_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit3_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit3_bn2_scale
self.stage3_unit3_relu1 = PReLU(256)
self.stage3_unit3_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit3_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit3_bn3_scale
self.stage3_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit3_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit3_se_relu1 = PReLU(16)
self.stage3_unit3_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit3_se_sigmoid = Sigmoid()
# self._plus9
self.stage3_unit4_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit4_bn1_scale
self.stage3_unit4_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit4_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit4_bn2_scale
self.stage3_unit4_relu1 = PReLU(256)
self.stage3_unit4_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit4_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit4_bn3_scale
self.stage3_unit4_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit4_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit4_se_relu1 = PReLU(16)
self.stage3_unit4_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit4_se_sigmoid = Sigmoid()
# self._plus10
self.stage3_unit5_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit5_bn1_scale
self.stage3_unit5_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit5_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit5_bn2_scale
self.stage3_unit5_relu1 = PReLU(256)
self.stage3_unit5_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit5_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit5_bn3_scale
self.stage3_unit5_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit5_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit5_se_relu1 = PReLU(16)
self.stage3_unit5_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit5_se_sigmoid = Sigmoid()
# self._plus11
self.stage3_unit6_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit6_bn1_scale
self.stage3_unit6_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit6_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit6_bn2_scale
self.stage3_unit6_relu1 = PReLU(256)
self.stage3_unit6_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit6_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit6_bn3_scale
self.stage3_unit6_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit6_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit6_se_relu1 = PReLU(16)
self.stage3_unit6_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit6_se_sigmoid = Sigmoid()
# self._plus12
self.stage3_unit7_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit7_bn1_scale
self.stage3_unit7_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit7_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit7_bn2_scale
self.stage3_unit7_relu1 = PReLU(256)
self.stage3_unit7_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit7_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit7_bn3_scale
self.stage3_unit7_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit7_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit7_se_relu1 = PReLU(16)
self.stage3_unit7_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit7_se_sigmoid = Sigmoid()
# self._plus13
self.stage3_unit8_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit8_bn1_scale
self.stage3_unit8_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit8_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit8_bn2_scale
self.stage3_unit8_relu1 = PReLU(256)
self.stage3_unit8_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit8_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit8_bn3_scale
self.stage3_unit8_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit8_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit8_se_relu1 = PReLU(16)
self.stage3_unit8_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit8_se_sigmoid = Sigmoid()
# self._plus14
self.stage3_unit9_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit9_bn1_scale
self.stage3_unit9_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit9_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit9_bn2_scale
self.stage3_unit9_relu1 = PReLU(256)
self.stage3_unit9_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit9_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit9_bn3_scale
self.stage3_unit9_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit9_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit9_se_relu1 = PReLU(16)
self.stage3_unit9_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit9_se_sigmoid = Sigmoid()
# self._plus15
self.stage3_unit10_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit10_bn1_scale
self.stage3_unit10_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit10_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit10_bn2_scale
self.stage3_unit10_relu1 = PReLU(256)
self.stage3_unit10_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit10_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit10_bn3_scale
self.stage3_unit10_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit10_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit10_se_relu1 = PReLU(16)
self.stage3_unit10_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit10_se_sigmoid = Sigmoid()
# self._plus16
self.stage3_unit11_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit11_bn1_scale
self.stage3_unit11_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit11_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit11_bn2_scale
self.stage3_unit11_relu1 = PReLU(256)
self.stage3_unit11_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit11_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit11_bn3_scale
self.stage3_unit11_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit11_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit11_se_relu1 = PReLU(16)
self.stage3_unit11_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit11_se_sigmoid = Sigmoid()
# self._plus17
self.stage3_unit12_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit12_bn1_scale
self.stage3_unit12_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit12_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit12_bn2_scale
self.stage3_unit12_relu1 = PReLU(256)
self.stage3_unit12_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit12_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit12_bn3_scale
self.stage3_unit12_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit12_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit12_se_relu1 = PReLU(16)
self.stage3_unit12_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit12_se_sigmoid = Sigmoid()
# self._plus18
self.stage3_unit13_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit13_bn1_scale
self.stage3_unit13_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit13_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit13_bn2_scale
self.stage3_unit13_relu1 = PReLU(256)
self.stage3_unit13_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit13_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit13_bn3_scale
self.stage3_unit13_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit13_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit13_se_relu1 = PReLU(16)
self.stage3_unit13_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit13_se_sigmoid = Sigmoid()
# self._plus19
self.stage3_unit14_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit14_bn1_scale
self.stage3_unit14_conv1 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit14_bn2 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit14_bn2_scale
self.stage3_unit14_relu1 = PReLU(256)
self.stage3_unit14_conv2 = Conv2d(256, out_channels=256, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage3_unit14_bn3 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage3_unit14_bn3_scale
self.stage3_unit14_se_pool1 = AdaptiveAvgPool2d(1)
self.stage3_unit14_se_conv1 = Conv2d(256, out_channels=16, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit14_se_relu1 = PReLU(16)
self.stage3_unit14_se_conv2 = Conv2d(16, out_channels=256, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage3_unit14_se_sigmoid = Sigmoid()
# self._plus20
self.stage4_unit1_bn1 = BatchNorm2d(256, eps=2e-5, momentum=0.9)
# self.stage4_unit1_bn1_scale
self.stage4_unit1_conv1 = Conv2d(256, out_channels=512, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage4_unit1_bn2 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit1_bn2_scale
self.stage4_unit1_relu1 = PReLU(512)
self.stage4_unit1_conv2 = Conv2d(512, out_channels=512, kernel_size=(3, 3), groups=1, stride=(2, 2), padding=(1,1), bias=False)
self.stage4_unit1_bn3 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit1_bn3_scale
self.stage4_unit1_se_pool1 = AdaptiveAvgPool2d(1)
self.stage4_unit1_se_conv1 = Conv2d(512, out_channels=32, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit1_se_relu1 = PReLU(32)
self.stage4_unit1_se_conv2 = Conv2d(32, out_channels=512, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit1_se_sigmoid = Sigmoid()
self.stage4_unit1_conv1sc = Conv2d(256, out_channels=512, kernel_size=(1, 1), groups=1, stride=(2, 2), padding=(0,0), bias=False)
self.stage4_unit1_sc = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit1_sc_scale
# self._plus21
self.stage4_unit2_bn1 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit2_bn1_scale
self.stage4_unit2_conv1 = Conv2d(512, out_channels=512, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage4_unit2_bn2 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit2_bn2_scale
self.stage4_unit2_relu1 = PReLU(512)
self.stage4_unit2_conv2 = Conv2d(512, out_channels=512, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage4_unit2_bn3 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit2_bn3_scale
self.stage4_unit2_se_pool1 = AdaptiveAvgPool2d(1)
self.stage4_unit2_se_conv1 = Conv2d(512, out_channels=32, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit2_se_relu1 = PReLU(32)
self.stage4_unit2_se_conv2 = Conv2d(32, out_channels=512, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit2_se_sigmoid = Sigmoid()
# self._plus22
self.stage4_unit3_bn1 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit3_bn1_scale
self.stage4_unit3_conv1 = Conv2d(512, out_channels=512, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage4_unit3_bn2 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit3_bn2_scale
self.stage4_unit3_relu1 = PReLU(512)
self.stage4_unit3_conv2 = Conv2d(512, out_channels=512, kernel_size=(3, 3), groups=1, stride=(1, 1), padding=(1,1), bias=False)
self.stage4_unit3_bn3 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.stage4_unit3_bn3_scale
self.stage4_unit3_se_pool1 = AdaptiveAvgPool2d(1)
self.stage4_unit3_se_conv1= Conv2d(512, out_channels=32, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit3_se_relu1 = PReLU(32)
self.stage4_unit3_se_conv2 = Conv2d(32, out_channels=512, kernel_size=(1, 1), groups=1, stride=(1, 1), padding=(0,0), bias=True)
self.stage4_unit3_se_sigmoid = Sigmoid()
# self._plus23
self.bn1 = BatchNorm2d(512, eps=2e-5, momentum=0.9)
# self.bn1_scale
self.drop0 = Dropout(0.4)
self.pre_fc1 = Linear(in_features=25088, out_features=512, bias=True)
self.fc1 = BatchNorm1d(512, eps=2e-05, momentum=0.9)#, affine=True, track_running_stats=True)
def forward(self, x):
out = self.conv0(x) # 20,64,112,112
out = self.bn0(out)
out = self.relu0(out)
# plus0
out2 = self.stage1_unit1_bn1(out)
out2 = self.stage1_unit1_conv1(out2)
out2 = self.stage1_unit1_bn2(out2)
out2 = self.stage1_unit1_relu1(out2)
out2 = self.stage1_unit1_conv2(out2)
out2 = self.stage1_unit1_bn3(out2)
# out2 = input
w = self.stage1_unit1_se_pool1(out2)
# out1 = w
w = self.stage1_unit1_se_conv1(w)
w = self.stage1_unit1_se_relu1(w)
w = self.stage1_unit1_se_conv2(w)
w = self.stage1_unit1_se_sigmoid(w)
out = self.stage1_unit1_conv1sc(out)
out = self.stage1_unit1_sc(out)
out = out2 * w + out
# plus1
out2 = self.stage1_unit2_bn1(out)
# self.stage1_unit2_bn1_scale
out2 = self.stage1_unit2_conv1(out2)
out2 = self.stage1_unit2_bn2(out2)
# self.stage1_unit2_bn2_scale
out2 = self.stage1_unit2_relu1(out2)
out2 = self.stage1_unit2_conv2(out2)
# out2 = input
out2 = self.stage1_unit2_bn3(out2)
# out1 = out2
# self.stage1_unit2_bn3_scale
w = self.stage1_unit2_se_pool1(out2)
w = self.stage1_unit2_se_conv1(w)
w = self.stage1_unit2_se_relu1(w)
w = self.stage1_unit2_se_conv2(w)
w = self.stage1_unit2_se_sigmoid(w)
out = out2 * w + out
# plus2
out2 = self.stage1_unit3_bn1(out)
# self.stage1_unit3_bn1_scale
out2 = self.stage1_unit3_conv1(out2)
out2 = self.stage1_unit3_bn2(out2)
# self.stage1_unit3_bn2_scale
out2 = self.stage1_unit3_relu1(out2)
out2 = self.stage1_unit3_conv2(out2)
out2 = self.stage1_unit3_bn3(out2)
# self.stage1_unit3_bn3_scale
w = self.stage1_unit3_se_pool1(out2)
w = self.stage1_unit3_se_conv1(w)
w = self.stage1_unit3_se_relu1(w)
w = self.stage1_unit3_se_conv2(w)
w = self.stage1_unit3_se_sigmoid(w)
out = out2 * w + out
# plus3
out2 = self.stage2_unit1_bn1(out)
# self.stage2_unit1_bn1_scale
out2 = self.stage2_unit1_conv1(out2)
out2 = self.stage2_unit1_bn2(out2)
# self.stage2_unit1_bn2_scale
out2 = self.stage2_unit1_relu1(out2)
out2 = self.stage2_unit1_conv2(out2)
out2 = self.stage2_unit1_bn3(out2)
# self.stage2_unit1_bn3_scale
w = self.stage2_unit1_se_pool1(out2)
w = self.stage2_unit1_se_conv1(w)
w = self.stage2_unit1_se_relu1(w)
w = self.stage2_unit1_se_conv2(w)
w = self.stage2_unit1_se_sigmoid(w)
out = self.stage2_unit1_conv1sc(out)
out = self.stage2_unit1_sc(out)
out = out2 * w + out
# self.stage2_unit1_sc_scale
# self._plus3 #axpy
# plus4
out2 = self.stage2_unit2_bn1(out)
# self.stage2_unit2_bn1_scale
out2 = self.stage2_unit2_conv1(out2)
out2 = self.stage2_unit2_bn2(out2)
# self.stage2_unit2_bn2_scale
out2 = self.stage2_unit2_relu1(out2)
out2 = self.stage2_unit2_conv2(out2)
out2 = self.stage2_unit2_bn3(out2)
# self.stage2_unit2_bn3_scale
w = self.stage2_unit2_se_pool1(out2)
w = self.stage2_unit2_se_conv1(w)
w = self.stage2_unit2_se_relu1(w)
w = self.stage2_unit2_se_conv2(w)
w = self.stage2_unit2_se_sigmoid(w)
# self._plus4
out = out2 * w + out
# plus5
out2 = self.stage2_unit3_bn1(out)
# self.stage2_unit3_bn1_scale
out2 = self.stage2_unit3_conv1(out2)
out2 = self.stage2_unit3_bn2(out2)
# self.stage2_unit3_bn2_scale
out2 = self.stage2_unit3_relu1(out2)
out2 = self.stage2_unit3_conv2(out2)
out2 = self.stage2_unit3_bn3(out2)
# self.stage2_unit3_bn3_scale
w = self.stage2_unit3_se_pool1(out2)
w = self.stage2_unit3_se_conv1(w)
w = self.stage2_unit3_se_relu1(w)
w = self.stage2_unit3_se_conv2(w)
w = self.stage2_unit3_se_sigmoid(w)
out = out2 * w + out
# self._plus5
# plus6
out2 = self.stage2_unit4_bn1(out)
# self.stage2_unit4_bn1_scale
out2 = self.stage2_unit4_conv1(out2)
out2 = self.stage2_unit4_bn2(out2)
# self.stage2_unit4_bn2_scale
out2 = self.stage2_unit4_relu1(out2)
out2 = self.stage2_unit4_conv2(out2)
out2 = self.stage2_unit4_bn3(out2)
# self.stage2_unit4_bn3_scale
w = self.stage2_unit4_se_pool1(out2)
w = self.stage2_unit4_se_conv1(w)
w = self.stage2_unit4_se_relu1(w)
w = self.stage2_unit4_se_conv2(w)
w = self.stage2_unit4_se_sigmoid(w)
# self._plus6
out = out2 * w + out
# plus7
out2 = self.stage3_unit1_bn1(out)
# self.stage3_unit1_bn1_scale
out2 = self.stage3_unit1_conv1(out2)
out2 = self.stage3_unit1_bn2(out2)
# self.stage3_unit1_bn2_scale
out2 = self.stage3_unit1_relu1(out2)
out2 = self.stage3_unit1_conv2(out2)
out2 = self.stage3_unit1_bn3(out2)
# self.stage3_unit1_bn3_scale
w = self.stage3_unit1_se_pool1(out2)
w = self.stage3_unit1_se_conv1(w)
w = self.stage3_unit1_se_relu1(w)
w = self.stage3_unit1_se_conv2(w)
w = self.stage3_unit1_se_sigmoid(w)
out = self.stage3_unit1_conv1sc(out)
out = self.stage3_unit1_sc(out)
# self.stage3_unit1_sc_scale
# self._plus7
out = out2 * w + out
# plus8
out2 = self.stage3_unit2_bn1(out)
# self.stage3_unit2_bn1_scale
out2 = self.stage3_unit2_conv1(out2)
out2 = self.stage3_unit2_bn2(out2)
# self.stage3_unit2_bn2_scale
out2 = self.stage3_unit2_relu1(out2)
out2 = self.stage3_unit2_conv2(out2)
out2 = self.stage3_unit2_bn3(out2)
# self.stage3_unit2_bn3_scale
w = self.stage3_unit2_se_pool1(out2)
w = self.stage3_unit2_se_conv1(w)
w = self.stage3_unit2_se_relu1(w)
w = self.stage3_unit2_se_conv2(w)
w = self.stage3_unit2_se_sigmoid(w)
# self._plus8
out = out2 * w + out
# plus9
out2 = self.stage3_unit3_bn1(out)
# self.stage3_unit3_bn1_scale
out2 = self.stage3_unit3_conv1(out2)
out2 = self.stage3_unit3_bn2(out2)
# self.stage3_unit3_bn2_scale
out2 = self.stage3_unit3_relu1(out2)
out2 = self.stage3_unit3_conv2(out2)
out2 = self.stage3_unit3_bn3(out2)
# self.stage3_unit3_bn3_scale
w = self.stage3_unit3_se_pool1(out2)
w = self.stage3_unit3_se_conv1(w)
w = self.stage3_unit3_se_relu1(w)
w = self.stage3_unit3_se_conv2(w)
w = self.stage3_unit3_se_sigmoid(w)
# self._plus9
out = out2 * w + out
# plus10
out2 = self.stage3_unit4_bn1(out)
# self.stage3_unit4_bn1_scale
out2 = self.stage3_unit4_conv1(out2)
out2 = self.stage3_unit4_bn2(out2)
# self.stage3_unit4_bn2_scale
out2 = self.stage3_unit4_relu1(out2)
out2 = self.stage3_unit4_conv2(out2)
out2 = self.stage3_unit4_bn3(out2)
# self.stage3_unit4_bn3_scale
w = self.stage3_unit4_se_pool1(out2)
w = self.stage3_unit4_se_conv1(w)
w = self.stage3_unit4_se_relu1(w)
w = self.stage3_unit4_se_conv2(w)
w = self.stage3_unit4_se_sigmoid(w)
# self._plus10
out = out2 * w + out
# plus11
out2 = self.stage3_unit5_bn1(out)
# self.stage3_unit5_bn1_scale
out2 = self.stage3_unit5_conv1(out2)
out2 = self.stage3_unit5_bn2(out2)
# self.stage3_unit5_bn2_scale
out2 = self.stage3_unit5_relu1(out2)
out2 = self.stage3_unit5_conv2(out2)
out2 = self.stage3_unit5_bn3(out2)
# self.stage3_unit5_bn3_scale
w = self.stage3_unit5_se_pool1(out2)
w = self.stage3_unit5_se_conv1(w)
w = self.stage3_unit5_se_relu1(w)
w = self.stage3_unit5_se_conv2(w)
w = self.stage3_unit5_se_sigmoid(w)
# self._plus11
out = out2 * w + out
# plus12
out2 = self.stage3_unit6_bn1(out)
# self.stage3_unit6_bn1_scale
out2 = self.stage3_unit6_conv1(out2)
out2 = self.stage3_unit6_bn2(out2)
# self.stage3_unit6_bn2_scale
out2 = self.stage3_unit6_relu1(out2)
out2 = self.stage3_unit6_conv2(out2)
out2 = self.stage3_unit6_bn3(out2)
# self.stage3_unit6_bn3_scale
w = self.stage3_unit6_se_pool1(out2)
w = self.stage3_unit6_se_conv1(w)
w = self.stage3_unit6_se_relu1(w)
w = self.stage3_unit6_se_conv2(w)
w = self.stage3_unit6_se_sigmoid(w)
# self._plus12
out = out2 * w + out
# plus13
out2 = self.stage3_unit7_bn1(out)
# self.stage3_unit7_bn1_scale
out2 = self.stage3_unit7_conv1(out2)
out2 = self.stage3_unit7_bn2(out2)
# self.stage3_unit7_bn2_scale
out2 = self.stage3_unit7_relu1(out2)
out2 = self.stage3_unit7_conv2(out2)
out2 = self.stage3_unit7_bn3(out2)
# self.stage3_unit7_bn3_scale
w = self.stage3_unit7_se_pool1(out2)
w = self.stage3_unit7_se_conv1(w)
w = self.stage3_unit7_se_relu1(w)
w = self.stage3_unit7_se_conv2(w)
w = self.stage3_unit7_se_sigmoid(w)
# self._plus13
out = out2 * w + out
# plus14
out2 = self.stage3_unit8_bn1(out)
# self.stage3_unit8_bn1_scale
out2 = self.stage3_unit8_conv1(out2)
out2 = self.stage3_unit8_bn2(out2)
# self.stage3_unit8_bn2_scale
out2 = self.stage3_unit8_relu1(out2)
out2 = self.stage3_unit8_conv2(out2)
out2 = self.stage3_unit8_bn3(out2)
# self.stage3_unit8_bn3_scale
w = self.stage3_unit8_se_pool1(out2)
w = self.stage3_unit8_se_conv1(w)
w = self.stage3_unit8_se_relu1(w)
w = self.stage3_unit8_se_conv2(w)
w = self.stage3_unit8_se_sigmoid(w)
# self._plus14
out = out2 * w + out
# plus15
out2 = self.stage3_unit9_bn1(out)
# self.stage3_unit9_bn1_scale
out2 = self.stage3_unit9_conv1(out2)
out2 = self.stage3_unit9_bn2(out2)
# self.stage3_unit9_bn2_scale
out2 = self.stage3_unit9_relu1(out2)
out2 = self.stage3_unit9_conv2(out2)
out2 = self.stage3_unit9_bn3(out2)
# self.stage3_unit9_bn3_scale
w = self.stage3_unit9_se_pool1(out2)
w = self.stage3_unit9_se_conv1(w)
w = self.stage3_unit9_se_relu1(w)
w = self.stage3_unit9_se_conv2(w)
w = self.stage3_unit9_se_sigmoid(w)
# self._plus15
out = out2 * w + out
# plus16
out2 = self.stage3_unit10_bn1(out)
# self.stage3_unit10_bn1_scale
out2 = self.stage3_unit10_conv1(out2)
out2 = self.stage3_unit10_bn2(out2)
# self.stage3_unit10_bn2_scale
out2 = self.stage3_unit10_relu1(out2)
out2 = self.stage3_unit10_conv2(out2)
out2 = self.stage3_unit10_bn3(out2)
# self.stage3_unit10_bn3_scale
w = self.stage3_unit10_se_pool1(out2)
w = self.stage3_unit10_se_conv1(w)
w = self.stage3_unit10_se_relu1(w)
w = self.stage3_unit10_se_conv2(w)
w = self.stage3_unit10_se_sigmoid(w)
# self._plus16
out = out2 * w + out
# return out
# plus17
out2 = self.stage3_unit11_bn1(out)
# self.stage3_unit11_bn1_scale
out2 = self.stage3_unit11_conv1(out2)
out2 = self.stage3_unit11_bn2(out2)
# self.stage3_unit11_bn2_scale
out2 = self.stage3_unit11_relu1(out2)
out2 = self.stage3_unit11_conv2(out2)
out2 = self.stage3_unit11_bn3(out2)
# self.stage3_unit11_bn3_scale
w = self.stage3_unit11_se_pool1(out2)
w = self.stage3_unit11_se_conv1(w)
w = self.stage3_unit11_se_relu1(w)
w = self.stage3_unit11_se_conv2(w)
w = self.stage3_unit11_se_sigmoid(w)
# self._plus17
out = out2 * w + out
# plus18
out2 = self.stage3_unit12_bn1(out)
# self.stage3_unit12_bn1_scale
out2 = self.stage3_unit12_conv1(out2)
out2 = self.stage3_unit12_bn2(out2)
# self.stage3_unit12_bn2_scale
out2 = self.stage3_unit12_relu1(out2)
out2 = self.stage3_unit12_conv2(out2)
out2 = self.stage3_unit12_bn3(out2)
# self.stage3_unit12_bn3_scale
w = self.stage3_unit12_se_pool1(out2)
w = self.stage3_unit12_se_conv1(w)
w = self.stage3_unit12_se_relu1(w)
w = self.stage3_unit12_se_conv2(w)
w = self.stage3_unit12_se_sigmoid(w)
# self._plus18
out = out2 * w + out
# plus19
out2 = self.stage3_unit13_bn1(out)
# self.stage3_unit13_bn1_scale
out2 = self.stage3_unit13_conv1(out2)
out2 = self.stage3_unit13_bn2(out2)
# self.stage3_unit13_bn2_scale
out2 = self.stage3_unit13_relu1(out2)
out2 = self.stage3_unit13_conv2(out2)
out2 = self.stage3_unit13_bn3(out2)
# self.stage3_unit13_bn3_scale
w = self.stage3_unit13_se_pool1(out2)
w = self.stage3_unit13_se_conv1(w)
w = self.stage3_unit13_se_relu1(w)
w = self.stage3_unit13_se_conv2(w)
w = self.stage3_unit13_se_sigmoid(w)
# self._plus19
out = out2 * w + out
# plus20
out2 = self.stage3_unit14_bn1(out)
# self.stage3_unit14_bn1_scale
out2 = self.stage3_unit14_conv1(out2)
out2 = self.stage3_unit14_bn2(out2)
# self.stage3_unit14_bn2_scale
out2 = self.stage3_unit14_relu1(out2)
out2 = self.stage3_unit14_conv2(out2)
out2 = self.stage3_unit14_bn3(out2)
# self.stage3_unit14_bn3_scale
w = self.stage3_unit14_se_pool1(out2)
w = self.stage3_unit14_se_conv1(w)
w = self.stage3_unit14_se_relu1(w)
w = self.stage3_unit14_se_conv2(w)
w = self.stage3_unit14_se_sigmoid(w)
# self._plus20
out = out2 * w + out
# plus21
out2 = self.stage4_unit1_bn1(out)
# self.stage4_unit1_bn1_scale
out2 = self.stage4_unit1_conv1(out2)
out2 = self.stage4_unit1_bn2(out2)
# self.stage4_unit1_bn2_scale
out2 = self.stage4_unit1_relu1(out2)
out2 = self.stage4_unit1_conv2(out2)
out2 = self.stage4_unit1_bn3(out2)
# self.stage4_unit1_bn3_scale
w = self.stage4_unit1_se_pool1(out2)
w = self.stage4_unit1_se_conv1(w)
w = self.stage4_unit1_se_relu1(w)
w = self.stage4_unit1_se_conv2(w)
w = self.stage4_unit1_se_sigmoid(w)
out = self.stage4_unit1_conv1sc(out)
# out = input
out = self.stage4_unit1_sc(out)
# out1 = out
# self.stage4_unit1_sc_scale
# self._plus21
out = out2 * w + out
# plus22
out2 = self.stage4_unit2_bn1(out)
# self.stage4_unit2_bn1_scale
out2 = self.stage4_unit2_conv1(out2)
out2 = self.stage4_unit2_bn2(out2)
# self.stage4_unit2_bn2_scale
out2 = self.stage4_unit2_relu1(out2)
out2 = self.stage4_unit2_conv2(out2)
out2 = self.stage4_unit2_bn3(out2)
# self.stage4_unit2_bn3_scale
w = self.stage4_unit2_se_pool1(out2)
w = self.stage4_unit2_se_conv1(w)
w = self.stage4_unit2_se_relu1(w)
w = self.stage4_unit2_se_conv2(w)
w = self.stage4_unit2_se_sigmoid(w)
# self._plus22
out = out2 * w + out
# plus23
out2 = self.stage4_unit3_bn1(out)
# self.stage4_unit3_bn1_scale
out2 = self.stage4_unit3_conv1(out2)
out2 = self.stage4_unit3_bn2(out2)
# self.stage4_unit3_bn2_scale
out2 = self.stage4_unit3_relu1(out2)
out2 = self.stage4_unit3_conv2(out2)
out2 = self.stage4_unit3_bn3(out2)
# self.stage4_unit3_bn3_scale
w = self.stage4_unit3_se_pool1(out2)
w = self.stage4_unit3_se_conv1(w)
w = self.stage4_unit3_se_relu1(w)
w = self.stage4_unit3_se_conv2(w)
w = self.stage4_unit3_se_sigmoid(w)
# self._plus23
out = out2 * w + out
out = self.bn1(out)
# self.bn1_scale
out = self.drop0(out)
out = out.view(out.size(0), -1)
out = self.pre_fc1(out)
# out = input
out = self.fc1(out)
# out1 = out
return out#, out1# , out_res #l2_norm(out)
2)利用caffe进行模型加载:
#coding=utf-8
import sys
sys.path.insert(0, "/home/fuxueping/sdb/Caffe_Project_Train/caffe-ssd/python")
import caffe
caffe_model = 'face.caffemodel'
prototxt = 'face.prototxt'
caffe.set_mode_gpu()
caffe.set_device(0)
net = caffe.Net(prototxt, caffe_model, caffe.TEST)
3)4)5)一起:
def init_model(self, model,net):
# print(model)
for n, m in model.named_modules():
if isinstance(m, BatchNorm2d):
self.bn_init(n, m, net)
elif isinstance(m, Conv2d):
self.conv_init(n, m, net)
elif isinstance(m, Linear):
self.fc_init(n, m, net)
elif isinstance(m, PReLU):
self.prelu_init(n, m, net)
if isinstance(m, BatchNorm1d):
self.bn_init(n, m, net)
# elif isinstance(m, AdaptiveAvgPool2d):
# self.AdaptiveAvgPool2d_init(n, m, net)
return model
def bn_init(self, n, m, net):
if n in net.params:
if len(net.params[n]) > 0:
# print(len(net.params[n]))
# for i in range(len(net.params[n])):
# print (i)
data0 = net.params[n][0].data
data1 = net.params[n][1].data
# data2 = net.params[n][2].data[:]
m.running_mean.copy_(torch.FloatTensor(data0))
m.running_var.copy_(torch.FloatTensor(data1))
name = n+'_scale'
if name in net.params:
if len(net.params[n]) > 0:
data0_scale = net.params[name][0].data
data1_scale = net.params[name][1].data
m.weight.data.copy_(torch.FloatTensor(data0_scale))
m.bias.data.copy_(torch.FloatTensor(data1_scale))
def conv_init(self, n, m, net):
# for pr in net.params:
if n in net.params:
if len(net.params[n]) > 0:
for i in range(len(net.params[n])):
print(i)
if i > 0:
data = net.params[n][1].data[:]
m.bias.data.copy_(torch.FloatTensor(data))
data = net.params[n][0].data[:]
m.weight.data.copy_(torch.FloatTensor(data))
def fc_init(self, n, m, net):
if n in net.params:
if len(net.params[n]) > 0:
for i in range(len(net.params[n])):
# print(i)
if i > 0:
data = net.params[n][1].data[:]
m.bias.data.copy_(torch.FloatTensor(data))
data = net.params[n][0].data[:]
m.weight.data.copy_(torch.FloatTensor(data))
def prelu_init(self, n, m, net):
if n in net.params:
if len(net.params[n]) > 0:
# for i in range(len(net.params[n])):
# print(i)
data = net.params[n][0].data[:]
m.weight.data.copy_(torch.FloatTensor(data))
def AdaptiveAvgPool2d_init(self, n, m, net):
if n in net.params:
if len(net.params[n]) > 0:
# for i in range(len(net.params[n])):
# print(i)
data = net.params[n][0].data[:]
m.weight.data.copy_(torch.FloatTensor(data))
6)对相应层进行参数(feature)进行比较:
a.保证相同的输入图,可以自己设置一个
img = np.ones([1,3,112,112]).astype(np.float32)
b.对每个层名进行feature的读取,进行比较:
caffe层读取参数:
net.blobs['data'].data[...] = img
output = net.forward()
caffe_data = net.blobs["fc1"].data[0][...].flatten()
pytorch读取参数:
out = net1(im_tensor.cuda())#, torch.from_numpy(out).unsqueeze(0).cuda())
# torch.from_numpy(w).unsqueeze(0).cuda(),
# torch.from_numpy(out2).unsqueeze(0).cuda())
pytorch_data = out.data.cpu().numpy().flatten()
c.进行参数对比:
diff = abs(pytorch_data - caffe_data).sum()
print("caffe & pytorch diff:", diff/pytorch_data.size)
参考资源:
torch和caffe中的BatchNorm层
用MXnet预训练模型初始化Pytorch模型