此神经网络由5
个卷积层(其中一些是最大池化层)和3
个全连接层组成。为了减少全连接层中的过拟合,采用了dropout
方法,此方法在实验中被证明非常有效。在ILSVRC-2012
比赛中,top-5 test error rate : 15.3% <第二名 26.2%>
。
256*256
的图像随机提取224*224
的图像,并进行水平翻转,这使得了训练数据增加的2048【(256-224)*(256-224)*2】
倍。在测试时,通过提取图像中的4个角落处
和中心区域
大小为224*224
的图像,并同样进行水平翻转共10
张图片,然后利用softmax
将其平均求出其属于哪一个类别RGB
通道的强度使用 dropout
在前两个全连接层,防止过拟合,以某一概率将隐藏层的输出值设置为0,论文中将此概率设置为0.5
虚线代表使用 Tanh
激活函数;实线代表使用 ReLu
激活函数。其他层结构均相同。
从上图可以看出Relu
更好,在达到同样的训练误差25%,Relu
比 Tanh
快6倍
Conv_1: (224,224, 3) —> (55, 55, 96)
卷积层:
Maxpool_1: (55, 55, 96) —> (27, 27,96)
池化层:
Conv_2: (27, 27, 96)—> (27, 27, 256)
卷积层:
Maxpool_2: (27,27,256) —> (13,13,256)
池化层:
Conv_3: (13,13,256) —> (13,13,384)
卷积层:
Conv_4:(13,13,384)—> (13,13,384)
卷积层:
Conv_5:(13,13,384)—> (13,13,256)
卷积层:
Maxpool_3: (13,13,256)—> (6,6,256)
池化层:
kernel_size:(3,3)
stride:(2,2)
FC_1 : 66256 —> 4096
FC_2: 4096 —> 4096
FC_3:4096 —> 4096
查看pytorch
中的alexnet
模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.alexnet(pretrained=True)
model.to(device)
print(model)
summary(model, input_size=(3, 224, 224))
(224*224)
的图像tensor
,方便计算mean=0.5, std=0.5
data_transform = {
"train": transforms.Compose(
[
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=(0.78, 0.75, 0.70), std=(0.29, 0.29, 0.31))
]
),
"val": transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=(.78, .75, .70), std=(.29, .29, .31))
]
)
}
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weight=False):
super(AlexNet, self).__init__()
self.feature_extraction = nn.Sequential(
# [3, 224, 224] -> [96, 55, 55]
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
nn.ReLU(inplace=True),
# [96, 55, 55] -> [96, 27, 27]
nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
# [96, 27, 27] -> [256, 27, 27] same padding
nn.Conv2d(in_channels=96, out_channels=256, kernel_size=(5, 5), padding=(2, 2)),
nn.ReLU(inplace=True),
# [256, 27, 27] -> [256, 13, 13]
nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
# [256, 13, 13] -> [384, 13, 13] same padding
nn.Conv2d(in_channels=256, out_channels=384, kernel_size=(3, 3), padding=(1, 1)),
nn.ReLU(inplace=True),
# [384, 13, 13] -> [384, 13, 13] same padding
nn.Conv2d(in_channels=384, out_channels=384, kernel_size=(3, 3), padding=(1, 1)),
nn.ReLU(inplace=True),
# [384, 13, 13] -> [256, 13, 13] same padding
nn.Conv2d(in_channels=384, out_channels=256, kernel_size=(3, 3), padding=(1, 1)),
nn.ReLU(inplace=True),
# [256, 13, 13] -> [256, 6, 6]
nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=256*6*6, out_features=4096),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.Linear(in_features=4096, out_features=num_classes),
)
def forward(self, x):
x = self.feature_extraction(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weight(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def get_net(flag, lr):
"""
flag: bool类型,判断是否使用pytroch上的预训练模型
lr: 学习率
"""
if flag:
# 加载预训练模型
net = alexnet(pretrained=True)
# 获得最后一层全连接层的输入维数
num_input = net.classifier[-1].in_features
cla_model = list(net.classifier.children())
# 删除最后一层全连接层,并添加自己的全连接层
cla_model.pop()
cla_model.append(nn.Linear(num_input, 12))
net.classifier = nn.Sequential(*cla_model)
# 只优化自己添加的全连接层参数
optimizer = optim.Adam(net.classifier[-1].parameters(), lr=lr)
else:
net = AlexNet(num_classes=12, init_weight=True)
optimizer = optim.Adam(net.parameters(), lr=lr)
return net, optimizer
def train(net, epochs, model_name, train_dataloader, val_dataloader, val_num, optimizer):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device == torch.device('cuda'):
print("Using {} {} device for training.".format(device, torch.cuda.get_device_name(device)))
else:
print("Using {} device for training.".format(device))
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optimizer
epochs = epochs
save_path = os.path.join(os.getcwd(), model_name)
best_acc = 0.0
train_dataloader_num = len(train_dataloader)
for epoch in range(epochs):
net.train()
total_loss = 0.0
for step, data in enumerate(tqdm(train_dataloader)):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
total_loss += loss.item()
net.eval()
acc = 0.0
with torch.no_grad():
for val_data in tqdm(val_dataloader):
val_images, val_labels = val_data
outputs = net(val_images.to(device))
y_pred = torch.max(outputs, dim=1)[1]
acc += torch.eq(y_pred, val_labels.to(device)).sum().item()
val_acc = acc / val_num
print('[epoch {}/{}] train_loss: {:.3f} val_acc: {:.3f}'.format(epoch + 1, epochs, total_loss / train_dataloader_num, val_acc))
if val_acc > best_acc:
best_acc = val_acc
torch.save(net.state_dict(), save_path)
print("Best acc: {:.3f}".format(best_acc))
print('Finished Training')
def main():
parse = argparse.ArgumentParser(description="AlexNet 命令行参数说明")
parse.add_argument('--batch_size', type=int, default=16, help='batch大小')
parse.add_argument('--file_name', type=str, default='garbage_classification', help='文件名')
parse.add_argument('--model_name', type=str, default='AlexNet.pth', help='模型名称')
parse.add_argument('--lr', type=float, default=0.0005, help='学习率')
parse.add_argument('--epoch', type=int, default=10, help='迭代次数')
parse.add_argument('--flag', action='store_true', help='是否使用ImageNet上的预训练模型')
args = parse.parse_args()
train_dataset, train_dataloader, val_dataset, val_dataloader = get_data(args.batch_size, args.file_name)
train_num, val_num = len(train_dataset), len(val_dataset)
print('Using {} images for training, {} images for validation'.format(train_num, val_num))
net, optimizer = get_net(args.flag, args.lr)
start = time.time()
train(net=net, epochs=args.epoch, model_name=args.model_name, train_dataloader=train_dataloader,
val_dataloader=val_dataloader, val_num=val_num, optimizer=optimizer)
end = time.time()
print("Train 耗时为:{}s".format(end-start))
if __name__ == '__main__':
main()
Using 3306 images for training, 364 images for validation
Using cuda GeForce RTX 2060 device for training.
lr: 0.0001
batch_size: 16
[epoch 1/10] train_loss: 1.349 val_acc: 0.456
[epoch 2/10] train_loss: 1.157 val_acc: 0.538
[epoch 3/10] train_loss: 1.066 val_acc: 0.577
[epoch 4/10] train_loss: 1.008 val_acc: 0.629
[epoch 5/10] train_loss: 0.988 val_acc: 0.607
[epoch 6/10] train_loss: 0.947 val_acc: 0.629
[epoch 7/10] train_loss: 0.906 val_acc: 0.646
[epoch 8/10] train_loss: 0.890 val_acc: 0.651
[epoch 9/10] train_loss: 0.842 val_acc: 0.684
[epoch 10/10] train_loss: 0.829 val_acc: 0.717
Best acc: 0.717
Finished Training
Train 耗时为:270.9s
[epoch 1/10] train_loss: 0.605 val_acc: 0.824
[epoch 2/10] train_loss: 0.409 val_acc: 0.830
[epoch 3/10] train_loss: 0.371 val_acc: 0.868
[epoch 4/10] train_loss: 0.328 val_acc: 0.865
[epoch 5/10] train_loss: 0.300 val_acc: 0.865
[epoch 6/10] train_loss: 0.299 val_acc: 0.863
[epoch 7/10] train_loss: 0.279 val_acc: 0.893
[epoch 8/10] train_loss: 0.255 val_acc: 0.879
[epoch 9/10] train_loss: 0.252 val_acc: 0.885
[epoch 10/10] train_loss: 0.254 val_acc: 0.857
Best acc: 0.893
Finished Training
Train 耗时为:194.9s
下一篇:VggNet
完整代码