2012年,AlexNet横空出世,该模型的名字源于论文第一作者的姓名Alex Krizhevsky 。AlexNet使用了8层卷积神经网络,以很大的优势赢得了ImageNet 2012图像识别挑战赛。它首次证明了学习到的特征可以超越手工设计的特征,从而一举打破计算机视觉研究的方向。
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten,Dense,Convolution2D,MaxPooling2D,Dropout,AvgPool2D
AlexNet = Sequential([
Convolution2D(filters=64, kernel_size=11, strides=4, activation='relu',input_shape=(227,227,3)),
MaxPooling2D(pool_size=3, strides=2),
Convolution2D(filters=192, kernel_size=5, strides=1, padding='same', activation='relu'),
MaxPooling2D(pool_size=3, strides=2),
Convolution2D(filters=384, kernel_size=3, strides=1, padding='same', activation='relu'),
Convolution2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu'),
Convolution2D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu'),
MaxPooling2D(pool_size=3, strides=2),
Flatten(),
Dropout(0.5),
Dense(units=4096, activation='relu'),
Dropout(0.5),
Dense(units=4096, activation='relu'),
Dropout(0.5),
Dense(units=1000, activation='softmax')
])
AlexNet.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 55, 55, 64) 23296
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 27, 27, 64) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 27, 27, 192) 307392
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 192) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 13, 13, 384) 663936
_________________________________________________________________
conv2d_3 (Conv2D) (None, 13, 13, 256) 884992
_________________________________________________________________
conv2d_4 (Conv2D) (None, 13, 13, 256) 590080
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 256) 0
_________________________________________________________________
flatten (Flatten) (None, 9216) 0
_________________________________________________________________
dropout (Dropout) (None, 9216) 0
_________________________________________________________________
dense (Dense) (None, 4096) 37752832
_________________________________________________________________
dropout_1 (Dropout) (None, 4096) 0
_________________________________________________________________
dense_1 (Dense) (None, 4096) 16781312
_________________________________________________________________
dropout_2 (Dropout) (None, 4096) 0
_________________________________________________________________
dense_2 (Dense) (None, 1000) 4097000
=================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
_________________________________________________________________
import torchvision.models as models
from torchsummary import summary
AlexNet = models.alexnet(pretrained=False,progress=True) # 默认为未训练好的模型
print(AlexNet)
summary(AlexNet.cuda(),(3,227,227))
AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 56, 56] 23,296
ReLU-2 [-1, 64, 56, 56] 0
MaxPool2d-3 [-1, 64, 27, 27] 0
Conv2d-4 [-1, 192, 27, 27] 307,392
ReLU-5 [-1, 192, 27, 27] 0
MaxPool2d-6 [-1, 192, 13, 13] 0
Conv2d-7 [-1, 384, 13, 13] 663,936
ReLU-8 [-1, 384, 13, 13] 0
Conv2d-9 [-1, 256, 13, 13] 884,992
ReLU-10 [-1, 256, 13, 13] 0
Conv2d-11 [-1, 256, 13, 13] 590,080
ReLU-12 [-1, 256, 13, 13] 0
MaxPool2d-13 [-1, 256, 6, 6] 0
AdaptiveAvgPool2d-14 [-1, 256, 6, 6] 0
Dropout-15 [-1, 9216] 0
Linear-16 [-1, 4096] 37,752,832
ReLU-17 [-1, 4096] 0
Dropout-18 [-1, 4096] 0
Linear-19 [-1, 4096] 16,781,312
ReLU-20 [-1, 4096] 0
Linear-21 [-1, 1000] 4,097,000
================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.59
Forward/backward pass size (MB): 8.49
Params size (MB): 233.08
Estimated Total Size (MB): 242.16
----------------------------------------------------------------