★b站:https://space.bilibili.com/18161609/channel/index
★CSDN:https://blog.csdn.net/qq_37541097
AlexNet是2012年ILSVRC 2012(ImageNet Large Scale Visual Recognition Challenge)竞赛的冠军网络。
论文链接:ImageNet Classification with Deep Convolutional Neural Networks
AlexNet网络的亮点:
对于代码的解释都在注释中,方便对照查看学习。
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential( # nn.Sequential可以将一系列层结构进行打包,组合成新的模块,对于网络层次多的网络,借助其精简代码
# 取名为features,代表专门提取图像特征的结构
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
nn.ReLU(inplace=True), # inplace可理解为增加计算量且降低内存使用量的一种方法
nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
# AlexNet架构为:Conv1,Maxpool1,Conv2,Maxpool2,Conv3,Conv4,Conv5,Maxpool3
)
self.classifier = nn.Sequential( # 借助nn.Sequential将全连接层打包为新的模块
nn.Dropout(p=0.5), # 在展平与FC1之间加上Dropout,0.5代表失活比例,默认等于0.5
nn.Linear(128 * 6 * 6, 2048), # output size:[6, 6, 128],且FC1节点数为2048
nn.ReLU(inplace=True), # 以上为FC1
nn.Dropout(p=0.5), # 在两个全连接层中加Dropout
nn.Linear(2048, 2048), # FC1节点个数为2048,FC2节点个数为2048
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes), # 根据实际情况,设置不同的num_classes
)
if init_weights: # 初始化权重选项,初始化中设置为True时,可进入初始化权重函数。
self._initialize_weights() # 此处不需要初始化,pytorch会自动进行kaiming初始化
def forward(self, x): # 正向传播过程
x = self.features(x)
x = torch.flatten(x, start_dim=1) # 展平处理,从index=1开始(torch的tensor排列为[batch,channel,height,width]),
# 即从channel这个维度开始展平,展成一个一维向量,也可用view(x.shape[0],-1)来完成展平
x = self.classifier(x) # 得到的输出即网络的预测输出
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
import os
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 用torch.device来制定训练过程中使用的设备,如果当前有可使用的GPU设备的话,默认使用硬件上的第1块GPU,否则,用CPU设备
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224), # 随机裁剪,裁剪到224*224大小
transforms.RandomHorizontalFlip(), # 随机翻转(水平方向)
transforms.ToTensor(), # 转化为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), # 进行标准化处理
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224) # 转化为224*224
transforms.ToTensor(), # 转化为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} # 标准化处理
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
# 获取数据集所在根目录,”..“代表返回上一层目录,则"../.."代表返回上上层目录
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path) # 进入到flower_data文件夹下
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), # 代表加载训练集的数据
transform=data_transform["train"]) # 调用上面定义的train数据预处理方法
# 用ImageFolder函数加载数据集,
train_num = len(train_dataset) # 打印训练集有多少张图片
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx # 获取分类的名称所对应的索引为一个字典flower_list
cla_dict = dict((val, key) for key, val in flower_list.items()) # 遍历上述字典并将键值对中键和值对调
# 这样做根据预测完返回的索引即可得到其类别,对调后变为{0:'daisy',:1:'dandelion',2: 'roses', 3:'sunflower', 4:'tulips'}
# write dict into json file
json_str = json.dumps(cla_dict, indent=4) # 通过json包将cla_dict字典编码为json格式
with open('class_indices.json', 'w') as json_file: # 打开class_indices文件保存为.json文件,方便预测时读取它的信息
json_file.write(json_str)
batch_size = 32 # 定义batch_size大小为32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
# 通过DataLoader函数将train_dataset加载进来, shuffle=True代表随机参数,随机从数据集中获取一批批的数据
# num_workers代表加载数据所使用的线程数,window系统下不可以将其设定为非0值,num_workers=0代表用1个主线程加载数据,linux系统下可设为非0值
# 下面代码载入了验证集,与载入训练集方法类似
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# 下面代码是如何查看数据集的代码
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=5, init_weights=True) # 实例化AlexNet
net.to(device) # 将网络指认到设备上
loss_function = nn.CrossEntropyLoss() # 损失函数为针对多类别的交叉熵损失函数
# pata = list(net.parameters()) # 调试用:可查看模型的参数
optimizer = optim.Adam(net.parameters(), lr=0.0002) # 采用Adam优化器,优化对象是网络中的所有参数net.parameters(),学习率0.0002
epochs = 10
save_path = './AlexNet.pth' # 给定保存权重的一个路径
best_acc = 0.0 # 定义一个最佳准确率,为了训练网络过程中能够保存准确率最高的一次训练模型
train_steps = len(train_loader)
for epoch in range(epochs): # 迭代epoches次(本例为10次)
# train
net.train() # 训练过程中用net.train,启用Dropout方法.[Dropout和BatchNormalization只在训练过程中使用]
running_loss = 0.0 # 统计训练过程中的平均损失
train_bar = tqdm(train_loader)
for step, data in enumerate(train_bar):
images, labels = data # 将数据分为图像和标签
optimizer.zero_grad() # 清空之前的梯度信息
outputs = net(images.to(device)) # 将训练图像指认到设备上,进行正向传播得到输出
loss = loss_function(outputs, labels.to(device)) # 计算预测值与真实值的损失,也将labels指认到设备上
loss.backward() # 将损失反向传播到各结点中
optimizer.step() # 用optimizer更新每一个结点的参数
# print statistics
running_loss += loss.item() # 将loss值累加到running_loss中
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
# 训练完一轮过后,进行验证
net.eval() # 验证过程中用net.eval(),会关闭Dropout方法
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad(): # 用以禁止pytorch对参数进行跟踪,即在验证过程中不去计算损失梯度
val_bar = tqdm(validate_loader) # tqdm是一个快速、可扩展的进度条,此处显示载入验证集的进度
for val_data in val_bar:
val_images, val_labels = val_data # 遍历验证集,将其分为图片和标签
outputs = net(val_images.to(device)) # 将图片指认到设备中,将网络进行正向传播得到输出
predict_y = torch.max(outputs, dim=1)[1] # 将求得输出的最大值作为预测
acc += torch.eq(predict_y, val_labels.to(device)).sum().item() # 用acc累计预测集中预测正确的样本个数
# 将预测值与真实标签进行对比,正确为1,错误为0。将其进行求和,并通过item()获得数值,加到acc上。
val_accurate = acc / val_num # 验证集准确率=验证正确个数/样本总个数
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate # 如果当前训练准确率大于最优准确率,则更新最优准确率,以获得最优参数
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy() # 通过torch.argmax()方法获得概率最大的输出的索引值
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 选择训练中使用的设备
# 图片预处理函数,对载入图片进行预处理
data_transform = transforms.Compose(
[transforms.Resize((224, 224)), # 转化为224*224大小
transforms.ToTensor(), # 转化为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 标准化处理
# load image
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path) # 载入图像
plt.imshow(img) # 展示图像
# [N, C, H, W]
img = data_transform(img) # 对图片记性预处理
# expand batch dimension
img = torch.unsqueeze(img, dim=0) # 读入的图片只有C,H,W3个维度,对其扩充一个维度
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
json_file = open(json_path, "r") # 读取在train.py中保存的json文件(内容为索引及对应的类别名称)
class_indict = json.load(json_file) # 解码成所需要使用的字典
# create model
model = AlexNet(num_classes=5).to(device) # 建立模型,初始化网络,num_classes的值即为数据集类别的数量
# load model weights
weights_path = "./AlexNet.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path)) # 载入网络模型
model.eval() # 用eval()模式,即关闭掉Dropout方法
with torch.no_grad(): # 用此方法让pytorch不去跟踪变量的损失梯度
# predict class
output = torch.squeeze(model(img.to(device))).cpu() # 将数据通过model进行正向传播得到输出,紧接着将输出进行压缩,即将batch这个维度压缩掉
predict = torch.softmax(output, dim=0) # 经过softmax处理后即可变为概率分布
predict_cla = torch.argmax(predict).numpy() # 通过torch.argmax()方法获得概率最大的输出的索引值
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy()) # 获得概率最大输出的类别名称及对应概率
plt.title(print_res)
print(print_res)
plt.show()
if __name__ == '__main__':
main()