本文是基于pytorch框架,以ResNet网络为基础模型(只是用了残差结构,和论文里提出的多个ResNet网络的结构不一样,没写的那么复杂),实现的猫狗识别,分以下几个部分
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
以下参数可以根据自己的需求进行调整
# 每次送入网络的数据个数
BATCH_SIZE = 20
# 迭代次数
EPOCHS = 50
# 采用cpu还是gpu进行计算
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 学习率
LR = 1e-3 # 科学计数法,就是0.001
这里我只是简单处理了以下,大家可以再加一些别的操作
transform = transforms.Compose([
transforms.Resize((32, 32)), # 将图片大小统一成32*32
transforms.ToTensor(), # 将数据转换为Tensor格式
])
这里要注意,我是用的是相对路径,大家在使用时要改成自己数据集存放的路径,下文会给出我的数据集路径是怎么存放的
# 数据加载
train_dataset = datasets.ImageFolder('../datasets/cats_dogs/train/', transform)
# 对应文件夹的label
print(train_dataset.class_to_idx)
test_dataset = datasets.ImageFolder('../datasets/cats_dogs/validation/', transform)
# 对应文件夹的label
print(test_dataset.class_to_idx)
# 导入数据
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
此处两个print的结果为
{'cats_2000': 0, 'dogs_2000': 1}
{'cats': 0, 'dogs': 1
说明pytorch将文件夹的存放顺序当成其标签的索引值存放
第一种方式是去kaggle官网下载,下面是网址。
https://www.kaggle.com/competitions/dogs-vs-cats-redux-kernels-edition/data
官方给的数据只有train和test,validation需要自己从train中划分。
第二种方式是通过下面的百度网盘来获取,这里我把validation已经给划分好了。
链接:https://pan.baidu.com/s/14qynfqluOU9XZI0GhgDa9Q
提取码:owi0
.py文件的存放路径
D:\jupyter\…\MyPytorch\Pytorch
数据集的存放位置
D:\jupyter\…\MyPytorch\datasets\cats_dogs
在cats_dogs文件夹下是数据集解压出来的train、test和validation文件夹,本文中用的是train和validation文件夹
import torch
from torch.nn import functional as F
from torch import nn, optim
# 实现Resnet的残差块结构
class ResBlock(nn.Module):
def __init__(self, ch_in, ch_out):
super(ResBlock, self).__init__()
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential()
# shortcut
if ch_out != ch_in:
# [b, ch_in, h, w] => [b, ch_out, h, w]
self.extra = nn.Sequential(
# 使用1×1卷积将shortcut路径上的x处理成和主路径输出结果的channel一致
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=1),
nn.BatchNorm2d(ch_out)
)
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = F.relu(out)
out = self.conv2(out)
out = self.bn2(out)
# short cut.
# [b, ch_in, h, w] => [b, ch_out, h, w]
out = self.extra(x) + out # extra是短接那条路径上传过来的x
# 再加一个ReLU,然后再输出
out = F.relu(out)
return out
# 这里实现的网络结构是论文中给出的结构的简化版
class ResNet(nn.Module):
def __init__(self):
super(ResNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1), # 输入通道为3,期望的输出通道为64
nn.BatchNorm2d(64)
)
self.blk1 = ResBlock(64, 64)
self.blk2 = ResBlock(64, 128)
# [C*H*W]==[128*14*14] 这里的参数一定要算对,不然会报错,H和W变成14是因为经过了conv1
self.outlayer = nn.Linear(128*14*14, 2) # 二分类问题,所以输出是2
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.blk1(x)
x = self.blk2(x)
# 打平,放入全连接层
x = x.view(x.size(0), -1) # [B, C, H, W] --> [B, C*H*W]
x = self.outlayer(x)
return x
def main():
x, label = iter(train_loader).next() # 迭代训练集
print('x:', x.shape, 'label:', label.shape)
device = DEVICE
model = ResNet().to(device)
criteon = nn.CrossEntropyLoss().to(device) # 设置损失函数为交叉熵函数
optimizer = optim.Adam(model.parameters(), lr=LR) # 设置优化器
print(model) # 打印模型的结构
# 训练模型
for epoch in range(EPOCHS):
model.train() # 声明为train模式
for idx, (x, label) in enumerate(train_loader):
x, label = x.to(device), label.to(device)
y_ = model(x) # 相当于调用model的forward方法
loss = criteon(y_, label) # 计算损失
optimizer.zero_grad() # 梯度清零
loss.backward() # 损失回传
optimizer.step()
print("Epoch:", epoch+1, 'loss:', loss.item())
# 测试模型
model.eval() # 声明为test模式
with torch.no_grad(): # with这一段不需要构建计算图
# test
total_correct = 0 # 正确的数量
total_num = 0
for x, label in test_loader:
x, label = x.to(device), label.to(device)
y_ = model(x)
pred = y_.argmax(dim=1) # 选出最大值的索引作为预测的分类结果
correct = torch.eq(pred, label).float().sum().item() # 如果预测值和label值相等则正确数量加一
total_correct += correct
total_num += x.size(0)
# print(correct)
acc = total_correct / total_num
print("Epoch:", epoch+1, 'acc:', acc)
if __name__ == '__main__':
main()