数字图像处理:GoogleNet代码(pytorch)之train.py解读

解释顺序就是代码阅读顺序

训练数据集处理:

1.device = torch.device()->调用GPU还是CPU进行训练

2.传入参数其实是一个判断函数:cuda:0" if torch.cuda.is_available() else "cpu"-》如果有GPU使用GPU进行计算训练,否则使用CPU。

3.预处理函数:transforms.Compose()

4.transforms.RandomResizedCrop(224)-》随机裁剪,裁剪到224x224大小。

5.transforms.RandomHorizontalFlip()-》随机翻转,数据增强一种方法,这里是水平翻转。

6.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),-》标准化处理,防止突出数值较高的指标在综合分析中的作用。

测试数据集处理:

1.transforms.Resize((224, 224)),-》强制转化为224x224.

2. transforms.ToTensor(),->转化为张量。

3.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}-》标准化处理,防止突出数值较高的指标在综合分析中的作用。

获取数据集:

1.data_root = os.path.abspath()-》获取数据集所在根目录

2.os.getcwd()-》获取当前所在文件的目录

3.os.path.join()-》将传入两个路径连接在一起

4."../.."-》表示返回上上一层目录

5.data_root + "/data_set/flower_data/"-》再从根目录开始向下进行完整目录的拼接。

6.datasets.ImageFolder()-》这个函数加载数据集

7.root=image_path + "/train"-》传入训练集数据路径

8.transform=data_transform["train"]-》调用训练数据集预处理模块,即:

 "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
 

分类处理:

1.train_dataset.class_to_idx-》通过这个获取分类名称对应索引。

2.cla_dict = dict((val, key) for key, val in flower_list.items()) -》循环遍历数组索引核值并交换重新赋值给数组,这样模型预测出来的直接就是value类别值。

3.json_str = json.dumps(cla_dict, indent=4)-》把字典编码成json格式。

4.with open('class_indices.json', 'w') as json_file: json_file.write(json_str)-》把字典类别索引写入json文件

5.batch_size = 32-》一次性载入训练32张图像

6.torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=0)-》加载数据集和其他参数

7.net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)->使用GoogleNet来定义网络模型,分类数为5,使用辅助分类器。使用权重初始化重新开始训练。

8.与AlexNet不一样的是GoogleNet有3个输出的预测值和loss值,因为有两个辅助分类器。主分类器与两个辅助分类器的loss权重分别为1,0.3,0.3。

for epoch in range(30):
    # train
    net.train()
    running_loss = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        logits, aux_logits2, aux_logits1 = net(images.to(device))
        loss0 = loss_function(logits, labels.to(device))
        loss1 = loss_function(aux_logits1, labels.to(device))
        loss2 = loss_function(aux_logits2, labels.to(device))
        loss = loss0 + loss1 * 0.3 + loss2 * 0.3
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        # print train process
        rate = (step + 1) / len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
    print()

还有验证时候不需要使用到辅助分类器来计算loss,所以也是只有一个输出。

最后附上完整代码:

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torchvision
import json
import matplotlib.pyplot as plt
import os
import torch.optim as optim
from model import GoogLeNet

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

data_transform = {
    "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
    "val": transforms.Compose([transforms.Resize((224, 224)),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
image_path = data_root + "/data_set/flower_data/"  # flower data set path

train_dataset = datasets.ImageFolder(root=image_path + "train",
                                     transform=data_transform["train"])
train_num = len(train_dataset)

# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)

batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size, shuffle=True,
                                           num_workers=0)

validate_dataset = datasets.ImageFolder(root=image_path + "val",
                                        transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                              batch_size=batch_size, shuffle=False,
                                              num_workers=0)

# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()

# net = torchvision.models.googlenet(num_classes=5)
# model_dict = net.state_dict()
# pretrain_model = torch.load("googlenet.pth")
# del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
#             "aux2.fc2.weight", "aux2.fc2.bias",
#             "fc.weight", "fc.bias"]
# pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
# model_dict.update(pretrain_dict)
# net.load_state_dict(model_dict)
net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0003)

best_acc = 0.0
save_path = './googleNet.pth'
for epoch in range(30):
    # train
    net.train()
    running_loss = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        logits, aux_logits2, aux_logits1 = net(images.to(device))
        loss0 = loss_function(logits, labels.to(device))
        loss1 = loss_function(aux_logits1, labels.to(device))
        loss2 = loss_function(aux_logits2, labels.to(device))
        loss = loss0 + loss1 * 0.3 + loss2 * 0.3
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        # print train process
        rate = (step + 1) / len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
    print()

    # validate
    net.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in validate_loader:
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))  # eval model only have last output layer
            predict_y = torch.max(outputs, dim=1)[1]
            acc += (predict_y == val_labels.to(device)).sum().item()
        val_accurate = acc / val_num
        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)
        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
              (epoch + 1, running_loss / step, val_accurate))

print('Finished Training')

https://github.com/WZMIAOMIAO/deep-learning-for-image-processing

你可能感兴趣的:(心得,人工智能,深度学习,python,迁移学习,tensorflow)