这篇https://blog.csdn.net/x1131230123/article/details/125643918在介绍PaddleClas的时候,发现很多backbone,那么在本篇直接对比各个backbone参数量差别,主观且直观体验一把各个backbone的参数量级。
{‘total_params’: 61100840, ‘trainable_params’: 61100840}
paddle.summary(mod.AlexNet(),(1,3,224,224))
---------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===========================================================================
Conv2D-135 [[1, 3, 224, 224]] [1, 64, 55, 55] 23,296
ReLU-86 [[1, 64, 55, 55]] [1, 64, 55, 55] 0
MaxPool2D-1 [[1, 64, 55, 55]] [1, 64, 27, 27] 0
ConvPoolLayer-1 [[1, 3, 224, 224]] [1, 64, 27, 27] 0
Conv2D-136 [[1, 64, 27, 27]] [1, 192, 27, 27] 307,392
ReLU-87 [[1, 192, 27, 27]] [1, 192, 27, 27] 0
MaxPool2D-2 [[1, 192, 27, 27]] [1, 192, 13, 13] 0
ConvPoolLayer-2 [[1, 64, 27, 27]] [1, 192, 13, 13] 0
Conv2D-137 [[1, 192, 13, 13]] [1, 384, 13, 13] 663,936
Conv2D-138 [[1, 384, 13, 13]] [1, 256, 13, 13] 884,992
Conv2D-139 [[1, 256, 13, 13]] [1, 256, 13, 13] 590,080
ReLU-88 [[1, 256, 13, 13]] [1, 256, 13, 13] 0
MaxPool2D-3 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
ConvPoolLayer-3 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
Dropout-1 [[1, 9216]] [1, 9216] 0
Linear-5 [[1, 9216]] [1, 4096] 37,752,832
Dropout-2 [[1, 4096]] [1, 4096] 0
Linear-6 [[1, 4096]] [1, 4096] 16,781,312
Linear-7 [[1, 4096]] [1, 1000] 4,097,000
===========================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 8.09
Params size (MB): 233.08
Estimated Total Size (MB): 241.75
---------------------------------------------------------------------------
{'total_params': 61100840, 'trainable_params': 61100840}
{‘total_params’: 27677512, ‘trainable_params’: 27606856}
paddle.summary(mod.CSPDarkNet53(),(1,3,224,224))
-------------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===============================================================================
Conv2D-140 [[1, 3, 224, 224]] [1, 32, 224, 224] 864
BatchNorm2D-1 [[1, 32, 224, 224]] [1, 32, 224, 224] 128
LeakyReLU-1 [[1, 32, 224, 224]] [1, 32, 224, 224] 0
ConvBnAct-1 [[1, 3, 224, 224]] [1, 32, 224, 224] 0
Conv2D-141 [[1, 32, 224, 224]] [1, 64, 112, 112] 18,432
BatchNorm2D-2 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
LeakyReLU-2 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
ConvBnAct-2 [[1, 32, 224, 224]] [1, 64, 112, 112] 0
Conv2D-142 [[1, 64, 112, 112]] [1, 128, 112, 112] 8,192
BatchNorm2D-3 [[1, 128, 112, 112]] [1, 128, 112, 112] 512
LeakyReLU-3 [[1, 128, 112, 112]] [1, 128, 112, 112] 0
ConvBnAct-3 [[1, 64, 112, 112]] [1, 128, 112, 112] 0
Conv2D-143 [[1, 64, 112, 112]] [1, 32, 112, 112] 2,048
BatchNorm2D-4 [[1, 32, 112, 112]] [1, 32, 112, 112] 128
LeakyReLU-4 [[1, 32, 112, 112]] [1, 32, 112, 112] 0
ConvBnAct-4 [[1, 64, 112, 112]] [1, 32, 112, 112] 0
Conv2D-144 [[1, 32, 112, 112]] [1, 64, 112, 112] 18,432
BatchNorm2D-5 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
LeakyReLU-5 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
ConvBnAct-5 [[1, 32, 112, 112]] [1, 64, 112, 112] 0
DarkBlock-1 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
Conv2D-145 [[1, 64, 112, 112]] [1, 64, 112, 112] 4,096
BatchNorm2D-6 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
LeakyReLU-6 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
ConvBnAct-6 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
Conv2D-146 [[1, 128, 112, 112]] [1, 64, 112, 112] 8,192
BatchNorm2D-7 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
LeakyReLU-7 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
ConvBnAct-7 [[1, 128, 112, 112]] [1, 64, 112, 112] 0
CrossStage-1 [[1, 32, 224, 224]] [1, 64, 112, 112] 0
Conv2D-147 [[1, 64, 112, 112]] [1, 128, 56, 56] 73,728
BatchNorm2D-8 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
LeakyReLU-8 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
ConvBnAct-8 [[1, 64, 112, 112]] [1, 128, 56, 56] 0
Conv2D-148 [[1, 128, 56, 56]] [1, 128, 56, 56] 16,384
BatchNorm2D-9 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
LeakyReLU-9 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
ConvBnAct-9 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
Conv2D-149 [[1, 64, 56, 56]] [1, 64, 56, 56] 4,096
BatchNorm2D-10 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
LeakyReLU-10 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
ConvBnAct-10 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
Conv2D-150 [[1, 64, 56, 56]] [1, 64, 56, 56] 36,864
BatchNorm2D-11 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
LeakyReLU-11 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
ConvBnAct-11 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
DarkBlock-2 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
Conv2D-151 [[1, 64, 56, 56]] [1, 64, 56, 56] 4,096
BatchNorm2D-12 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
LeakyReLU-12 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
ConvBnAct-12 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
Conv2D-152 [[1, 64, 56, 56]] [1, 64, 56, 56] 36,864
BatchNorm2D-13 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
LeakyReLU-13 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
ConvBnAct-13 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
DarkBlock-3 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
Conv2D-153 [[1, 64, 56, 56]] [1, 64, 56, 56] 4,096
BatchNorm2D-14 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
LeakyReLU-14 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
ConvBnAct-14 [[1, 64, 56, 56]] [1, 64, 56, 56] 0
Conv2D-154 [[1, 128, 56, 56]] [1, 128, 56, 56] 16,384
BatchNorm2D-15 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
LeakyReLU-15 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
ConvBnAct-15 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
CrossStage-2 [[1, 64, 112, 112]] [1, 128, 56, 56] 0
Conv2D-155 [[1, 128, 56, 56]] [1, 256, 28, 28] 294,912
BatchNorm2D-16 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
LeakyReLU-16 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
ConvBnAct-16 [[1, 128, 56, 56]] [1, 256, 28, 28] 0
Conv2D-156 [[1, 256, 28, 28]] [1, 256, 28, 28] 65,536
BatchNorm2D-17 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
LeakyReLU-17 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
ConvBnAct-17 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-157 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-18 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-18 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-18 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-158 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-19 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-19 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-19 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-4 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-159 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-20 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-20 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-20 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-160 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-21 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-21 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-21 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-5 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-161 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-22 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-22 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-22 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-162 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-23 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-23 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-23 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-6 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-163 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-24 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-24 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-24 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-164 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-25 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-25 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-25 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-7 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-165 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-26 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-26 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-26 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-166 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-27 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-27 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-27 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-8 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-167 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-28 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-28 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-28 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-168 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-29 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-29 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-29 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-9 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-169 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-30 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-30 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-30 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-170 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-31 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-31 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-31 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-10 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-171 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-32 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-32 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-32 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-172 [[1, 128, 28, 28]] [1, 128, 28, 28] 147,456
BatchNorm2D-33 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-33 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-33 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
DarkBlock-11 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-173 [[1, 128, 28, 28]] [1, 128, 28, 28] 16,384
BatchNorm2D-34 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
LeakyReLU-34 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
ConvBnAct-34 [[1, 128, 28, 28]] [1, 128, 28, 28] 0
Conv2D-174 [[1, 256, 28, 28]] [1, 256, 28, 28] 65,536
BatchNorm2D-35 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
LeakyReLU-35 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
ConvBnAct-35 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
CrossStage-3 [[1, 128, 56, 56]] [1, 256, 28, 28] 0
Conv2D-175 [[1, 256, 28, 28]] [1, 512, 14, 14] 1,179,648
BatchNorm2D-36 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
LeakyReLU-36 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
ConvBnAct-36 [[1, 256, 28, 28]] [1, 512, 14, 14] 0
Conv2D-176 [[1, 512, 14, 14]] [1, 512, 14, 14] 262,144
BatchNorm2D-37 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
LeakyReLU-37 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
ConvBnAct-37 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-177 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-38 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-38 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-38 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-178 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-39 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-39 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-39 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-12 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-179 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-40 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-40 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-40 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-180 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-41 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-41 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-41 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-13 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-181 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-42 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-42 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-42 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-182 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-43 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-43 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-43 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-14 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-183 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-44 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-44 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-44 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-184 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-45 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-45 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-45 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-15 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-185 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-46 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-46 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-46 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-186 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-47 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-47 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-47 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-16 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-187 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-48 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-48 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-48 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-188 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-49 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-49 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-49 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-17 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-189 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-50 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-50 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-50 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-190 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-51 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-51 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-51 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-18 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-191 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-52 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-52 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-52 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-192 [[1, 256, 14, 14]] [1, 256, 14, 14] 589,824
BatchNorm2D-53 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-53 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-53 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
DarkBlock-19 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-193 [[1, 256, 14, 14]] [1, 256, 14, 14] 65,536
BatchNorm2D-54 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
LeakyReLU-54 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
ConvBnAct-54 [[1, 256, 14, 14]] [1, 256, 14, 14] 0
Conv2D-194 [[1, 512, 14, 14]] [1, 512, 14, 14] 262,144
BatchNorm2D-55 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
LeakyReLU-55 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
ConvBnAct-55 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
CrossStage-4 [[1, 256, 28, 28]] [1, 512, 14, 14] 0
Conv2D-195 [[1, 512, 14, 14]] [1, 1024, 7, 7] 4,718,592
BatchNorm2D-56 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
LeakyReLU-56 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
ConvBnAct-56 [[1, 512, 14, 14]] [1, 1024, 7, 7] 0
Conv2D-196 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 1,048,576
BatchNorm2D-57 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
LeakyReLU-57 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
ConvBnAct-57 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
Conv2D-197 [[1, 512, 7, 7]] [1, 512, 7, 7] 262,144
BatchNorm2D-58 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-58 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-58 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-198 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296
BatchNorm2D-59 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-59 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-59 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
DarkBlock-20 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-199 [[1, 512, 7, 7]] [1, 512, 7, 7] 262,144
BatchNorm2D-60 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-60 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-60 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-200 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296
BatchNorm2D-61 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-61 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-61 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
DarkBlock-21 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-201 [[1, 512, 7, 7]] [1, 512, 7, 7] 262,144
BatchNorm2D-62 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-62 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-62 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-202 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296
BatchNorm2D-63 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-63 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-63 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
DarkBlock-22 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-203 [[1, 512, 7, 7]] [1, 512, 7, 7] 262,144
BatchNorm2D-64 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-64 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-64 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-204 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,359,296
BatchNorm2D-65 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-65 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-65 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
DarkBlock-23 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-205 [[1, 512, 7, 7]] [1, 512, 7, 7] 262,144
BatchNorm2D-66 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
LeakyReLU-66 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
ConvBnAct-66 [[1, 512, 7, 7]] [1, 512, 7, 7] 0
Conv2D-206 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 1,048,576
BatchNorm2D-67 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
LeakyReLU-67 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
ConvBnAct-67 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
CrossStage-5 [[1, 512, 14, 14]] [1, 1024, 7, 7] 0
AdaptiveAvgPool2D-5 [[1, 1024, 7, 7]] [1, 1024, 1, 1] 0
Flatten-4 [[1, 1024, 1, 1]] [1, 1024] 0
Linear-8 [[1, 1024]] [1, 1000] 1,025,000
===============================================================================
Total params: 27,677,512
Trainable params: 27,606,856
Non-trainable params: 70,656
-------------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 423.80
Params size (MB): 105.58
Estimated Total Size (MB): 529.95
-------------------------------------------------------------------------------
{'total_params': 27677512, 'trainable_params': 27606856}
{‘total_params’: 22320552, ‘trainable_params’: 22320552}
paddle.summary(mod.CSWinTransformer_tiny_224(),(1,3,224,224))
-----------------------------------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
=====================================================================================================
Conv2D-207 [[1, 3, 224, 224]] [1, 64, 56, 56] 9,472
LayerNorm-1 [[1, 3136, 64]] [1, 3136, 64] 128
PatchEmbedding-1 [[1, 3, 224, 224]] [1, 3136, 64] 0
LayerNorm-2 [[1, 3136, 64]] [1, 3136, 64] 128
Linear-9 [[1, 3136, 64]] [1, 3136, 192] 12,480
Conv2D-208 [[56, 32, 56, 1]] [56, 32, 56, 1] 320
Softmax-1 [[56, 1, 56, 56]] [56, 1, 56, 56] 0
Dropout-3 [[56, 1, 56, 56]] [56, 1, 56, 56] 0
LePEAttention-1 [[1, 3136, 32], [1, 3136, 32], [1, 3136, 32]] [1, 3136, 32] 0
Conv2D-209 [[56, 32, 1, 56]] [56, 32, 1, 56] 320
Softmax-2 [[56, 1, 56, 56]] [56, 1, 56, 56] 0
Dropout-4 [[56, 1, 56, 56]] [56, 1, 56, 56] 0
LePEAttention-2 [[1, 3136, 32], [1, 3136, 32], [1, 3136, 32]] [1, 3136, 32] 0
Linear-10 [[1, 3136, 64]] [1, 3136, 64] 4,160
Identity-1 [[1, 3136, 64]] [1, 3136, 64] 0
LayerNorm-3 [[1, 3136, 64]] [1, 3136, 64] 128
Linear-11 [[1, 3136, 64]] [1, 3136, 256] 16,640
GELU-1 [[1, 3136, 256]] [1, 3136, 256] 0
Dropout-5 [[1, 3136, 64]] [1, 3136, 64] 0
Linear-12 [[1, 3136, 256]] [1, 3136, 64] 16,448
Mlp-1 [[1, 3136, 64]] [1, 3136, 64] 0
CSwinBlock-1 [[1, 3136, 64]] [1, 3136, 64] 0
Conv2D-210 [[1, 64, 56, 56]] [1, 128, 28, 28] 73,856
LayerNorm-4 [[1, 784, 128]] [1, 784, 128] 256
MergeBlock-1 [[1, 3136, 64]] [1, 784, 128] 0
CSwinStage-1 [[1, 3136, 64]] [1, 784, 128] 0
LayerNorm-5 [[1, 784, 128]] [1, 784, 128] 256
Linear-13 [[1, 784, 128]] [1, 784, 384] 49,536
Conv2D-211 [[14, 64, 28, 2]] [14, 64, 28, 2] 640
Softmax-3 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
Dropout-6 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
LePEAttention-3 [[1, 784, 64], [1, 784, 64], [1, 784, 64]] [1, 784, 64] 0
Conv2D-212 [[14, 64, 2, 28]] [14, 64, 2, 28] 640
Softmax-4 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
Dropout-7 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
LePEAttention-4 [[1, 784, 64], [1, 784, 64], [1, 784, 64]] [1, 784, 64] 0
Linear-14 [[1, 784, 128]] [1, 784, 128] 16,512
DropPath-1 [[1, 784, 128]] [1, 784, 128] 0
LayerNorm-6 [[1, 784, 128]] [1, 784, 128] 256
Linear-15 [[1, 784, 128]] [1, 784, 512] 66,048
GELU-2 [[1, 784, 512]] [1, 784, 512] 0
Dropout-8 [[1, 784, 128]] [1, 784, 128] 0
Linear-16 [[1, 784, 512]] [1, 784, 128] 65,664
Mlp-2 [[1, 784, 128]] [1, 784, 128] 0
CSwinBlock-2 [[1, 784, 128]] [1, 784, 128] 0
LayerNorm-7 [[1, 784, 128]] [1, 784, 128] 256
Linear-17 [[1, 784, 128]] [1, 784, 384] 49,536
Conv2D-213 [[14, 64, 28, 2]] [14, 64, 28, 2] 640
Softmax-5 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
Dropout-9 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
LePEAttention-5 [[1, 784, 64], [1, 784, 64], [1, 784, 64]] [1, 784, 64] 0
Conv2D-214 [[14, 64, 2, 28]] [14, 64, 2, 28] 640
Softmax-6 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
Dropout-10 [[14, 2, 56, 56]] [14, 2, 56, 56] 0
LePEAttention-6 [[1, 784, 64], [1, 784, 64], [1, 784, 64]] [1, 784, 64] 0
Linear-18 [[1, 784, 128]] [1, 784, 128] 16,512
DropPath-2 [[1, 784, 128]] [1, 784, 128] 0
LayerNorm-8 [[1, 784, 128]] [1, 784, 128] 256
Linear-19 [[1, 784, 128]] [1, 784, 512] 66,048
GELU-3 [[1, 784, 512]] [1, 784, 512] 0
Dropout-11 [[1, 784, 128]] [1, 784, 128] 0
Linear-20 [[1, 784, 512]] [1, 784, 128] 65,664
Mlp-3 [[1, 784, 128]] [1, 784, 128] 0
CSwinBlock-3 [[1, 784, 128]] [1, 784, 128] 0
Conv2D-215 [[1, 128, 28, 28]] [1, 256, 14, 14] 295,168
LayerNorm-9 [[1, 196, 256]] [1, 196, 256] 512
MergeBlock-2 [[1, 784, 128]] [1, 196, 256] 0
CSwinStage-2 [[1, 784, 128]] [1, 196, 256] 0
LayerNorm-10 [[1, 196, 256]] [1, 196, 256] 512
Linear-21 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-216 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-7 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-12 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-7 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-217 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-8 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-13 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-8 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-22 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-3 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-11 [[1, 196, 256]] [1, 196, 256] 512
Linear-23 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-4 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-14 [[1, 196, 256]] [1, 196, 256] 0
Linear-24 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-4 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-4 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-12 [[1, 196, 256]] [1, 196, 256] 512
Linear-25 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-218 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-9 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-15 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-9 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-219 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-10 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-16 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-10 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-26 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-4 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-13 [[1, 196, 256]] [1, 196, 256] 512
Linear-27 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-5 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-17 [[1, 196, 256]] [1, 196, 256] 0
Linear-28 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-5 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-5 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-14 [[1, 196, 256]] [1, 196, 256] 512
Linear-29 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-220 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-11 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-18 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-11 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-221 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-12 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-19 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-12 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-30 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-5 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-15 [[1, 196, 256]] [1, 196, 256] 512
Linear-31 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-6 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-20 [[1, 196, 256]] [1, 196, 256] 0
Linear-32 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-6 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-6 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-16 [[1, 196, 256]] [1, 196, 256] 512
Linear-33 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-222 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-13 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-21 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-13 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-223 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-14 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-22 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-14 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-34 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-6 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-17 [[1, 196, 256]] [1, 196, 256] 512
Linear-35 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-7 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-23 [[1, 196, 256]] [1, 196, 256] 0
Linear-36 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-7 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-7 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-18 [[1, 196, 256]] [1, 196, 256] 512
Linear-37 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-224 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-15 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-24 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-15 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-225 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-16 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-25 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-16 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-38 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-7 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-19 [[1, 196, 256]] [1, 196, 256] 512
Linear-39 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-8 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-26 [[1, 196, 256]] [1, 196, 256] 0
Linear-40 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-8 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-8 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-20 [[1, 196, 256]] [1, 196, 256] 512
Linear-41 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-226 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-17 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-27 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-17 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-227 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-18 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-28 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-18 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-42 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-8 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-21 [[1, 196, 256]] [1, 196, 256] 512
Linear-43 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-9 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-29 [[1, 196, 256]] [1, 196, 256] 0
Linear-44 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-9 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-9 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-22 [[1, 196, 256]] [1, 196, 256] 512
Linear-45 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-228 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-19 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-30 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-19 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-229 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-20 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-31 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-20 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-46 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-9 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-23 [[1, 196, 256]] [1, 196, 256] 512
Linear-47 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-10 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-32 [[1, 196, 256]] [1, 196, 256] 0
Linear-48 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-10 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-10 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-24 [[1, 196, 256]] [1, 196, 256] 512
Linear-49 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-230 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-21 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-33 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-21 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-231 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-22 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-34 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-22 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-50 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-10 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-25 [[1, 196, 256]] [1, 196, 256] 512
Linear-51 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-11 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-35 [[1, 196, 256]] [1, 196, 256] 0
Linear-52 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-11 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-11 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-26 [[1, 196, 256]] [1, 196, 256] 512
Linear-53 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-232 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-23 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-36 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-23 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-233 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-24 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-37 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-24 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-54 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-11 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-27 [[1, 196, 256]] [1, 196, 256] 512
Linear-55 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-12 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-38 [[1, 196, 256]] [1, 196, 256] 0
Linear-56 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-12 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-12 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-28 [[1, 196, 256]] [1, 196, 256] 512
Linear-57 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-234 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-25 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-39 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-25 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-235 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-26 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-40 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-26 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-58 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-12 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-29 [[1, 196, 256]] [1, 196, 256] 512
Linear-59 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-13 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-41 [[1, 196, 256]] [1, 196, 256] 0
Linear-60 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-13 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-13 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-30 [[1, 196, 256]] [1, 196, 256] 512
Linear-61 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-236 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-27 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-42 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-27 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-237 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-28 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-43 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-28 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-62 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-13 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-31 [[1, 196, 256]] [1, 196, 256] 512
Linear-63 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-14 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-44 [[1, 196, 256]] [1, 196, 256] 0
Linear-64 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-14 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-14 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-32 [[1, 196, 256]] [1, 196, 256] 512
Linear-65 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-238 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-29 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-45 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-29 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-239 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-30 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-46 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-30 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-66 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-14 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-33 [[1, 196, 256]] [1, 196, 256] 512
Linear-67 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-15 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-47 [[1, 196, 256]] [1, 196, 256] 0
Linear-68 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-15 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-15 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-34 [[1, 196, 256]] [1, 196, 256] 512
Linear-69 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-240 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-31 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-48 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-31 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-241 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-32 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-49 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-32 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-70 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-15 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-35 [[1, 196, 256]] [1, 196, 256] 512
Linear-71 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-16 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-50 [[1, 196, 256]] [1, 196, 256] 0
Linear-72 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-16 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-16 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-36 [[1, 196, 256]] [1, 196, 256] 512
Linear-73 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-242 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-33 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-51 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-33 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-243 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-34 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-52 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-34 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-74 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-16 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-37 [[1, 196, 256]] [1, 196, 256] 512
Linear-75 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-17 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-53 [[1, 196, 256]] [1, 196, 256] 0
Linear-76 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-17 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-17 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-38 [[1, 196, 256]] [1, 196, 256] 512
Linear-77 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-244 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-35 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-54 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-35 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-245 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-36 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-55 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-36 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-78 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-17 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-39 [[1, 196, 256]] [1, 196, 256] 512
Linear-79 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-18 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-56 [[1, 196, 256]] [1, 196, 256] 0
Linear-80 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-18 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-18 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-40 [[1, 196, 256]] [1, 196, 256] 512
Linear-81 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-246 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-37 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-57 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-37 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-247 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-38 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-58 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-38 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-82 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-18 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-41 [[1, 196, 256]] [1, 196, 256] 512
Linear-83 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-19 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-59 [[1, 196, 256]] [1, 196, 256] 0
Linear-84 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-19 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-19 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-42 [[1, 196, 256]] [1, 196, 256] 512
Linear-85 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-248 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-39 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-60 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-39 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-249 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-40 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-61 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-40 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-86 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-19 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-43 [[1, 196, 256]] [1, 196, 256] 512
Linear-87 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-20 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-62 [[1, 196, 256]] [1, 196, 256] 0
Linear-88 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-20 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-20 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-44 [[1, 196, 256]] [1, 196, 256] 512
Linear-89 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-250 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-41 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-63 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-41 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-251 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-42 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-64 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-42 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-90 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-20 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-45 [[1, 196, 256]] [1, 196, 256] 512
Linear-91 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-21 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-65 [[1, 196, 256]] [1, 196, 256] 0
Linear-92 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-21 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-21 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-46 [[1, 196, 256]] [1, 196, 256] 512
Linear-93 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-252 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-43 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-66 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-43 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-253 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-44 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-67 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-44 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-94 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-21 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-47 [[1, 196, 256]] [1, 196, 256] 512
Linear-95 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-22 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-68 [[1, 196, 256]] [1, 196, 256] 0
Linear-96 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-22 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-22 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-48 [[1, 196, 256]] [1, 196, 256] 512
Linear-97 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-254 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-45 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-69 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-45 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-255 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-46 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-70 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-46 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-98 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-22 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-49 [[1, 196, 256]] [1, 196, 256] 512
Linear-99 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-23 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-71 [[1, 196, 256]] [1, 196, 256] 0
Linear-100 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-23 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-23 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-50 [[1, 196, 256]] [1, 196, 256] 512
Linear-101 [[1, 196, 256]] [1, 196, 768] 197,376
Conv2D-256 [[2, 128, 14, 7]] [2, 128, 14, 7] 1,280
Softmax-47 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-72 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-47 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Conv2D-257 [[2, 128, 7, 14]] [2, 128, 7, 14] 1,280
Softmax-48 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
Dropout-73 [[2, 4, 98, 98]] [2, 4, 98, 98] 0
LePEAttention-48 [[1, 196, 128], [1, 196, 128], [1, 196, 128]] [1, 196, 128] 0
Linear-102 [[1, 196, 256]] [1, 196, 256] 65,792
DropPath-23 [[1, 196, 256]] [1, 196, 256] 0
LayerNorm-51 [[1, 196, 256]] [1, 196, 256] 512
Linear-103 [[1, 196, 256]] [1, 196, 1024] 263,168
GELU-24 [[1, 196, 1024]] [1, 196, 1024] 0
Dropout-74 [[1, 196, 256]] [1, 196, 256] 0
Linear-104 [[1, 196, 1024]] [1, 196, 256] 262,400
Mlp-24 [[1, 196, 256]] [1, 196, 256] 0
CSwinBlock-24 [[1, 196, 256]] [1, 196, 256] 0
Conv2D-258 [[1, 256, 14, 14]] [1, 512, 7, 7] 1,180,160
LayerNorm-52 [[1, 49, 512]] [1, 49, 512] 1,024
MergeBlock-3 [[1, 196, 256]] [1, 49, 512] 0
CSwinStage-3 [[1, 196, 256]] [1, 49, 512] 0
LayerNorm-53 [[1, 49, 512]] [1, 49, 512] 1,024
Linear-105 [[1, 49, 512]] [1, 49, 1536] 787,968
Conv2D-259 [[1, 512, 7, 7]] [1, 512, 7, 7] 5,120
Softmax-49 [[1, 16, 49, 49]] [1, 16, 49, 49] 0
Dropout-75 [[1, 16, 49, 49]] [1, 16, 49, 49] 0
LePEAttention-49 [[1, 49, 512], [1, 49, 512], [1, 49, 512]] [1, 49, 512] 0
Linear-106 [[1, 49, 512]] [1, 49, 512] 262,656
DropPath-24 [[1, 49, 512]] [1, 49, 512] 0
LayerNorm-54 [[1, 49, 512]] [1, 49, 512] 1,024
Linear-107 [[1, 49, 512]] [1, 49, 2048] 1,050,624
GELU-25 [[1, 49, 2048]] [1, 49, 2048] 0
Dropout-76 [[1, 49, 512]] [1, 49, 512] 0
Linear-108 [[1, 49, 2048]] [1, 49, 512] 1,049,088
Mlp-25 [[1, 49, 512]] [1, 49, 512] 0
CSwinBlock-25 [[1, 49, 512]] [1, 49, 512] 0
Identity-2 [[1, 49, 512]] [1, 49, 512] 0
CSwinStage-4 [[1, 49, 512]] [1, 49, 512] 0
LayerNorm-55 [[1, 49, 512]] [1, 49, 512] 1,024
Linear-109 [[1, 512]] [1, 1000] 513,000
=====================================================================================================
Total params: 22,320,552
Trainable params: 22,320,552
Non-trainable params: 0
-----------------------------------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 308.23
Params size (MB): 85.15
Estimated Total Size (MB): 393.95
-----------------------------------------------------------------------------------------------------
{'total_params': 22320552, 'trainable_params': 22320552}
{‘total_params’: 41645640, ‘trainable_params’: 41574216}
paddle.summary(mod.DarkNet53(),(1,3,224,224))
-------------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===============================================================================
Conv2D-260 [[1, 3, 224, 224]] [1, 32, 224, 224] 864
BatchNorm-135 [[1, 32, 224, 224]] [1, 32, 224, 224] 128
ConvBNLayer-135 [[1, 3, 224, 224]] [1, 32, 224, 224] 0
Conv2D-261 [[1, 32, 224, 224]] [1, 64, 112, 112] 18,432
BatchNorm-136 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
ConvBNLayer-136 [[1, 32, 224, 224]] [1, 64, 112, 112] 0
Conv2D-262 [[1, 64, 112, 112]] [1, 32, 112, 112] 2,048
BatchNorm-137 [[1, 32, 112, 112]] [1, 32, 112, 112] 128
ConvBNLayer-137 [[1, 64, 112, 112]] [1, 32, 112, 112] 0
Conv2D-263 [[1, 32, 112, 112]] [1, 64, 112, 112] 18,432
BatchNorm-138 [[1, 64, 112, 112]] [1, 64, 112, 112] 256
ConvBNLayer-138 [[1, 32, 112, 112]] [1, 64, 112, 112] 0
BasicBlock-1 [[1, 64, 112, 112]] [1, 64, 112, 112] 0
Conv2D-264 [[1, 64, 112, 112]] [1, 128, 56, 56] 73,728
BatchNorm-139 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
ConvBNLayer-139 [[1, 64, 112, 112]] [1, 128, 56, 56] 0
Conv2D-265 [[1, 128, 56, 56]] [1, 64, 56, 56] 8,192
BatchNorm-140 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
ConvBNLayer-140 [[1, 128, 56, 56]] [1, 64, 56, 56] 0
Conv2D-266 [[1, 64, 56, 56]] [1, 128, 56, 56] 73,728
BatchNorm-141 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
ConvBNLayer-141 [[1, 64, 56, 56]] [1, 128, 56, 56] 0
BasicBlock-2 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
Conv2D-267 [[1, 128, 56, 56]] [1, 64, 56, 56] 8,192
BatchNorm-142 [[1, 64, 56, 56]] [1, 64, 56, 56] 256
ConvBNLayer-142 [[1, 128, 56, 56]] [1, 64, 56, 56] 0
Conv2D-268 [[1, 64, 56, 56]] [1, 128, 56, 56] 73,728
BatchNorm-143 [[1, 128, 56, 56]] [1, 128, 56, 56] 512
ConvBNLayer-143 [[1, 64, 56, 56]] [1, 128, 56, 56] 0
BasicBlock-3 [[1, 128, 56, 56]] [1, 128, 56, 56] 0
Conv2D-269 [[1, 128, 56, 56]] [1, 256, 28, 28] 294,912
BatchNorm-144 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-144 [[1, 128, 56, 56]] [1, 256, 28, 28] 0
Conv2D-270 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-145 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-145 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-271 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-146 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-146 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-4 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-272 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-147 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-147 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-273 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-148 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-148 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-5 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-274 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-149 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-149 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-275 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-150 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-150 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-6 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-276 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-151 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-151 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-277 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-152 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-152 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-7 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-278 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-153 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-153 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-279 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-154 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-154 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-8 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-280 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-155 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-155 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-281 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-156 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-156 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-9 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-282 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-157 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-157 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-283 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-158 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-158 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-10 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-284 [[1, 256, 28, 28]] [1, 128, 28, 28] 32,768
BatchNorm-159 [[1, 128, 28, 28]] [1, 128, 28, 28] 512
ConvBNLayer-159 [[1, 256, 28, 28]] [1, 128, 28, 28] 0
Conv2D-285 [[1, 128, 28, 28]] [1, 256, 28, 28] 294,912
BatchNorm-160 [[1, 256, 28, 28]] [1, 256, 28, 28] 1,024
ConvBNLayer-160 [[1, 128, 28, 28]] [1, 256, 28, 28] 0
BasicBlock-11 [[1, 256, 28, 28]] [1, 256, 28, 28] 0
Conv2D-286 [[1, 256, 28, 28]] [1, 512, 14, 14] 1,179,648
BatchNorm-161 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-161 [[1, 256, 28, 28]] [1, 512, 14, 14] 0
Conv2D-287 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-162 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-162 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-288 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-163 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-163 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-12 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-289 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-164 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-164 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-290 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-165 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-165 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-13 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-291 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-166 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-166 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-292 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-167 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-167 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-14 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-293 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-168 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-168 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-294 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-169 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-169 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-15 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-295 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-170 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-170 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-296 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-171 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-171 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-16 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-297 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-172 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-172 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-298 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-173 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-173 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-17 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-299 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-174 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-174 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-300 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-175 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-175 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-18 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-301 [[1, 512, 14, 14]] [1, 256, 14, 14] 131,072
BatchNorm-176 [[1, 256, 14, 14]] [1, 256, 14, 14] 1,024
ConvBNLayer-176 [[1, 512, 14, 14]] [1, 256, 14, 14] 0
Conv2D-302 [[1, 256, 14, 14]] [1, 512, 14, 14] 1,179,648
BatchNorm-177 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,048
ConvBNLayer-177 [[1, 256, 14, 14]] [1, 512, 14, 14] 0
BasicBlock-19 [[1, 512, 14, 14]] [1, 512, 14, 14] 0
Conv2D-303 [[1, 512, 14, 14]] [1, 1024, 7, 7] 4,718,592
BatchNorm-178 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
ConvBNLayer-178 [[1, 512, 14, 14]] [1, 1024, 7, 7] 0
Conv2D-304 [[1, 1024, 7, 7]] [1, 512, 7, 7] 524,288
BatchNorm-179 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
ConvBNLayer-179 [[1, 1024, 7, 7]] [1, 512, 7, 7] 0
Conv2D-305 [[1, 512, 7, 7]] [1, 1024, 7, 7] 4,718,592
BatchNorm-180 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
ConvBNLayer-180 [[1, 512, 7, 7]] [1, 1024, 7, 7] 0
BasicBlock-20 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
Conv2D-306 [[1, 1024, 7, 7]] [1, 512, 7, 7] 524,288
BatchNorm-181 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
ConvBNLayer-181 [[1, 1024, 7, 7]] [1, 512, 7, 7] 0
Conv2D-307 [[1, 512, 7, 7]] [1, 1024, 7, 7] 4,718,592
BatchNorm-182 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
ConvBNLayer-182 [[1, 512, 7, 7]] [1, 1024, 7, 7] 0
BasicBlock-21 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
Conv2D-308 [[1, 1024, 7, 7]] [1, 512, 7, 7] 524,288
BatchNorm-183 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
ConvBNLayer-183 [[1, 1024, 7, 7]] [1, 512, 7, 7] 0
Conv2D-309 [[1, 512, 7, 7]] [1, 1024, 7, 7] 4,718,592
BatchNorm-184 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
ConvBNLayer-184 [[1, 512, 7, 7]] [1, 1024, 7, 7] 0
BasicBlock-22 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
Conv2D-310 [[1, 1024, 7, 7]] [1, 512, 7, 7] 524,288
BatchNorm-185 [[1, 512, 7, 7]] [1, 512, 7, 7] 2,048
ConvBNLayer-185 [[1, 1024, 7, 7]] [1, 512, 7, 7] 0
Conv2D-311 [[1, 512, 7, 7]] [1, 1024, 7, 7] 4,718,592
BatchNorm-186 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 4,096
ConvBNLayer-186 [[1, 512, 7, 7]] [1, 1024, 7, 7] 0
BasicBlock-23 [[1, 1024, 7, 7]] [1, 1024, 7, 7] 0
AdaptiveAvgPool2D-6 [[1, 1024, 7, 7]] [1, 1024, 1, 1] 0
Linear-110 [[1, 1024]] [1, 1000] 1,025,000
===============================================================================
Total params: 41,645,640
Trainable params: 41,574,216
Non-trainable params: 71,424
-------------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 249.23
Params size (MB): 158.87
Estimated Total Size (MB): 408.67
-------------------------------------------------------------------------------
{'total_params': 41645640, 'trainable_params': 41574216}
{‘total_params’: 87184592, ‘trainable_params’: 87184592}
paddle.summary(mod.DeiT_base_distilled_patch16_224(),(1,3,224,224))
---------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===========================================================================
Conv2D-312 [[1, 3, 224, 224]] [1, 768, 14, 14] 590,592
PatchEmbed-1 [[1, 3, 224, 224]] [1, 196, 768] 0
Dropout-77 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-56 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-111 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-78 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-112 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-79 [[1, 198, 768]] [1, 198, 768] 0
Attention-1 [[1, 198, 768]] [1, 198, 768] 0
Identity-3 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-57 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-113 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-26 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-80 [[1, 198, 768]] [1, 198, 768] 0
Linear-114 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-26 [[1, 198, 768]] [1, 198, 768] 0
Block-1 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-58 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-115 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-81 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-116 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-82 [[1, 198, 768]] [1, 198, 768] 0
Attention-2 [[1, 198, 768]] [1, 198, 768] 0
Identity-4 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-59 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-117 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-27 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-83 [[1, 198, 768]] [1, 198, 768] 0
Linear-118 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-27 [[1, 198, 768]] [1, 198, 768] 0
Block-2 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-60 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-119 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-84 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-120 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-85 [[1, 198, 768]] [1, 198, 768] 0
Attention-3 [[1, 198, 768]] [1, 198, 768] 0
Identity-5 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-61 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-121 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-28 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-86 [[1, 198, 768]] [1, 198, 768] 0
Linear-122 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-28 [[1, 198, 768]] [1, 198, 768] 0
Block-3 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-62 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-123 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-87 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-124 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-88 [[1, 198, 768]] [1, 198, 768] 0
Attention-4 [[1, 198, 768]] [1, 198, 768] 0
Identity-6 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-63 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-125 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-29 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-89 [[1, 198, 768]] [1, 198, 768] 0
Linear-126 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-29 [[1, 198, 768]] [1, 198, 768] 0
Block-4 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-64 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-127 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-90 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-128 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-91 [[1, 198, 768]] [1, 198, 768] 0
Attention-5 [[1, 198, 768]] [1, 198, 768] 0
Identity-7 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-65 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-129 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-30 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-92 [[1, 198, 768]] [1, 198, 768] 0
Linear-130 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-30 [[1, 198, 768]] [1, 198, 768] 0
Block-5 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-66 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-131 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-93 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-132 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-94 [[1, 198, 768]] [1, 198, 768] 0
Attention-6 [[1, 198, 768]] [1, 198, 768] 0
Identity-8 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-67 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-133 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-31 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-95 [[1, 198, 768]] [1, 198, 768] 0
Linear-134 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-31 [[1, 198, 768]] [1, 198, 768] 0
Block-6 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-68 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-135 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-96 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-136 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-97 [[1, 198, 768]] [1, 198, 768] 0
Attention-7 [[1, 198, 768]] [1, 198, 768] 0
Identity-9 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-69 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-137 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-32 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-98 [[1, 198, 768]] [1, 198, 768] 0
Linear-138 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-32 [[1, 198, 768]] [1, 198, 768] 0
Block-7 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-70 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-139 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-99 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-140 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-100 [[1, 198, 768]] [1, 198, 768] 0
Attention-8 [[1, 198, 768]] [1, 198, 768] 0
Identity-10 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-71 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-141 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-33 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-101 [[1, 198, 768]] [1, 198, 768] 0
Linear-142 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-33 [[1, 198, 768]] [1, 198, 768] 0
Block-8 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-72 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-143 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-102 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-144 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-103 [[1, 198, 768]] [1, 198, 768] 0
Attention-9 [[1, 198, 768]] [1, 198, 768] 0
Identity-11 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-73 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-145 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-34 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-104 [[1, 198, 768]] [1, 198, 768] 0
Linear-146 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-34 [[1, 198, 768]] [1, 198, 768] 0
Block-9 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-74 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-147 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-105 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-148 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-106 [[1, 198, 768]] [1, 198, 768] 0
Attention-10 [[1, 198, 768]] [1, 198, 768] 0
Identity-12 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-75 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-149 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-35 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-107 [[1, 198, 768]] [1, 198, 768] 0
Linear-150 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-35 [[1, 198, 768]] [1, 198, 768] 0
Block-10 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-76 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-151 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-108 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-152 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-109 [[1, 198, 768]] [1, 198, 768] 0
Attention-11 [[1, 198, 768]] [1, 198, 768] 0
Identity-13 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-77 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-153 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-36 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-110 [[1, 198, 768]] [1, 198, 768] 0
Linear-154 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-36 [[1, 198, 768]] [1, 198, 768] 0
Block-11 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-78 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-155 [[1, 198, 768]] [1, 198, 2304] 1,771,776
Dropout-111 [[1, 12, 198, 198]] [1, 12, 198, 198] 0
Linear-156 [[1, 198, 768]] [1, 198, 768] 590,592
Dropout-112 [[1, 198, 768]] [1, 198, 768] 0
Attention-12 [[1, 198, 768]] [1, 198, 768] 0
Identity-14 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-79 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-157 [[1, 198, 768]] [1, 198, 3072] 2,362,368
GELU-37 [[1, 198, 3072]] [1, 198, 3072] 0
Dropout-113 [[1, 198, 768]] [1, 198, 768] 0
Linear-158 [[1, 198, 3072]] [1, 198, 768] 2,360,064
Mlp-37 [[1, 198, 768]] [1, 198, 768] 0
Block-12 [[1, 198, 768]] [1, 198, 768] 0
LayerNorm-80 [[1, 198, 768]] [1, 198, 768] 1,536
Linear-159 [[1, 768]] [1, 1000] 769,000
Linear-160 [[1, 768]] [1, 1000] 769,000
===========================================================================
Total params: 87,184,592
Trainable params: 87,184,592
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 340.06
Params size (MB): 332.58
Estimated Total Size (MB): 673.22
---------------------------------------------------------------------------
{'total_params': 87184592, 'trainable_params': 87184592}