基于pytorch的猫狗分类 可以替换自己的数据集imagefolder的root和网络模型的最后一层就行
刚刚开始学习dl,模型结构是学习的alexnet,程序有很多不合理的地方,希望大佬们可以指导下!
也希望能有bro来一起交流学习下!
训练部分
import torchvision.datasets import torch from torch import nn, tensor import matplotlib.pyplot as plt from torch.utils.data import DataLoader from torchvision.transforms import transforms from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score #transform data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "test": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} # 准备数据集 # train_data = torchvision.datasets.CIFAR10(root="../CIFAR10", train=True, transform=data_transform["train"], download=True) # test_data = torchvision.datasets.CIFAR10(root="../CIFAR10", train=False, transform=data_transform["test"], download=True) # 使用自己的数据集 train_data = torchvision.datasets.ImageFolder(root=r"E:\work\Dog_Cat_Dataset\train", transform =data_transform["train"]) test_data = torchvision.datasets.ImageFolder(root=r"E:\work\Dog_Cat_Dataset\test", transform = data_transform["test"]) # 利用dataloader来加载数据集 traindata = DataLoader(train_data, batch_size=64, shuffle=True) testdata = DataLoader(test_data, batch_size=64, shuffle=True) # 搭建神经网络 输入 [3,224,224] class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=3, padding=2), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2), nn.Flatten(), nn.Dropout(p=0.5), nn.Linear(in_features=128*9*9, out_features=1024), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(in_features=1024, out_features=1024), nn.ReLU(), nn.Linear(in_features=1024, out_features=2) ) def forward(self, x): x = self.model(x) return x if __name__ == '__main__': net = Net() input = torch.ones((64, 3, 224, 224)) output = net(input) print(output.shape) # 长度 train_data_size = len(train_data) test_data_size = len(test_data) print(train_data_size) print(test_data_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) net = Net() # 损失函数 loss_function = nn.CrossEntropyLoss() # 优化器 learning_rate = 0.001 optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate,weight_decay=0.01) # 学习率调度机制 # lambda1 = lambda epoch:0.95 ** epoch # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,lr_lambda = lambda1) # 设置训练网络的一些参数 # 记录训练的次数 total_train_step = 0 # 记录测试的次数 total_test_step = 0 # 训练的轮数 epoch = 100 # loss和acc的空列表 loss1 = [] loss2 = [] acc1 = [] acc2 = [] # lr_list = [] for i in range(epoch): total_train_loss = 0 total_train_acc = 0 print("第{}轮训练开始".format(i+1)) # 训练步骤开始 for data in traindata: imgs, targets = data outputs = net(imgs) loss = loss_function(outputs, targets) # 优化器优化模型 optimizer.zero_grad() loss.backward() optimizer.step() acc_train = (outputs.argmax(1) == targets).sum() total_train_loss = total_train_loss + loss total_train_acc = total_train_acc + acc_train # 输出训练轮数和训练中的loss total_train_step = total_train_step + 1 if total_train_step % 10 == 0: print("训练次数:{}, loss:{}".format(total_train_step, loss)) # 查看学习率变化 # print("epoch={}, lr={}".format(epoch, optimizer.state_dict()['param_groups'][0]['lr'])) # scheduler.step() # lr_list.append(optimizer.state_dict()['param_groups'][0]['lr']) # 测试步骤 total_test_loss = 0 total_test_acc = 0 net.eval() # 后面不进行反向传播,不会进行计算图的构建,用于测试 with torch.no_grad(): for data in testdata: imgs, targets = data outputs = net(imgs) loss = loss_function(outputs, targets) total_test_loss = total_test_loss + loss # 对比标签是否相等,然后布尔值求和,相等即为1,不等为0 acc_test = (outputs.argmax(1) == targets).sum() total_test_acc = total_test_acc + acc_test print("整体训练集上的loss:{}".format(total_train_loss)) print("整体测试集上的loss:{}".format(total_test_loss)) print("整体训练集上的正确率:{}".format(total_train_acc / train_data_size)) print("整体测试集上的正确率:{}".format(total_test_acc / test_data_size)) print("整体测试集上的precision:{}".format(precision_score(targets, outputs.argmax(1), average='macro'))) print("整体测试集上的recall:{}".format(recall_score(targets, outputs.argmax(1), average='macro'))) print("整体测试集上的F1-score:{}".format(f1_score(targets, outputs.argmax(1), average='macro'))) # 用append往空列表中增加元素,元素数量必须与epoch相同 loss1.append(total_train_loss) loss2.append(total_test_loss) acc1.append(total_test_acc / test_data_size) acc2.append(total_train_acc / train_data_size) sum = tensor(0) n = 10 for i in range(epoch-1-n,epoch-1): sum = torch.add(sum, acc1[i]) print("最后{}个epoch平均准确率为:{}".format(n,sum/10)) torch.save(net.state_dict(), "net_model") # 画图 print(loss1) print(acc1) fig = plt.figure() plt.subplot(221) plt.plot(range(epoch), loss1, label="Train loss") plt.xlabel("epoch") plt.ylabel("train loss") plt.legend() plt.subplot(222) plt.plot(range(epoch), loss2, label="Test loss") plt.xlabel("epoch") plt.ylabel("test loss") plt.legend() plt.subplot(223) plt.plot(range(epoch), acc2, label=" Train Acc") plt.xlabel("epoch") plt.ylabel("acc") plt.legend() plt.subplot(224) plt.plot(range(epoch), acc1, label=" Test Acc") plt.xlabel("epoch") plt.ylabel("acc") plt.legend() plt.show()
预测部分
from torchvision.transforms import transforms from PIL import Image import torch from torch import nn transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()]) path = "1.webp" image = Image.open(path) image = image.convert("RGB") image = transform(image) image = image.unsqueeze(dim=0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # 输入[3,224,224] class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=3, padding=2), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2), nn.Flatten(), nn.Dropout(p=0.5), nn.Linear(in_features=128*9*9, out_features=1024), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(in_features=1024, out_features=1024), nn.ReLU(), nn.Linear(in_features=1024, out_features=2) ) def forward(self, x): x = self.model(x) return x net = Net() net.load_state_dict(torch.load("net_model")) net.eval() outputs = net(image) mark = (outputs.argmax(1)).item() if mark == 0: print("猫") else: print("狗")