首先要从torchvision包中import datasets和transforms, 用于加载数据集。而这又分两种情况,数据集在Pytorch中已有或者自定义数据集
Pytorch中已有(以CIFAR10举例)
#data和./data一样,都是当前目录下创建一个data文件夹,在里面下载
#transforms对图片做变换,normalize正则化将图片
train_set = datasets.CIFAR10('./data',train=True,download=True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]))
自定义数据集(例如Pokemon,与后文对应)
#传入自定义数据集的参数(根目录,输出图片大小,模式)
train_set = Pokemon('pokemon', 224, mode='train')
然后从torch.utils.data包中import DataLoader,用于一次加载多张照片
#shuffle代表随机化,将数据打散
#batchsz代表一次处理的图片数量,不能太小,会不稳定
train_loader = DataLoader(train_set,batch_size=batchsz,shuffle=True)
之后初始化网络,设置gpu, 设置优化器及loss.需要从torchvision中import nn和optim
device = torch.device('cuda:0')
#这里ResNet是使用的模型(可以是自己写的),下文中net就代指此网络
net = ResNet().to(device)
#设置SGD或Adam优化器
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
#loss使用交叉熵计算
criteon = nn.CrossEntropyLoss().to(device)
之后是对网络的训练和评价(以简单的分类问题为例)。
for epoch in range(epochs):
net.train()#进入训练模式,利于防止过拟合
for batchidx, (x, label) in enumerate(train_loader):
x, label = x.to(device), label.to(device)
logits = net(x) #得出网络结果
loss = criteon(logits, label) #计算loss
#返向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('epoch:{} loss:{}'.format(epoch, loss.item()))
net.eval()#进入测试模式,固定BN和dropout
with torch.no_grad():#不计算梯度
total_correct = 0
total_num = 0
for x, label in test_loader:
x, label = x.to(device), label.to(device)
logits = net(x) //自动调用net中的forward方法
#dim=1代表不要列这个维度了,求每一行最大的列标号,成为这行唯一元素
#每行最大的元素下标就是其类别结果,因此可将pred与label比较
pred = logits.argmax(dim=1)
#eq是一个一个比较,相等取1,不等取0,求和就是相等即正确的个数
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
#向量x第0维的大小,即一个batch元素个数
total_num += x.size(0)
acc = total_correct/total_num
print('epoch:{} acc:{}'.format(epoch, acc))
如果要保存最佳模型参数,则在每个epoch训练后进行验证,如果发现验证集准确率高于最好准确率,更新最好准确率,存储此时的模型参数,使用时再将其加载。
#存到当前目录下,名为'best.mdl'
torch.save(net.state_dict(),'best.mdl')
#全部训练结束后,加载最好模型参数用于测试
net.load_state_dict(torch.load('best.mdl'))
以ResNet18为例, 写一下一个CNN模型有哪些步骤。首先,所有的自定义模型都需要继承nn.Module,需要实现init方法和forward方法(初始化和前向传播),下面以Lenet5举例。
__init__方法
def __init__(self, ch_in, ch_out, stride=1):
super(Lenet5, self).__init__()#这句话必须要有
#可以是单个层,也可以是一个sequential(多层在一起)
self.conv_unit = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=0),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=0),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
)
self.fc_unit = nn.Sequential(
nn.Linear(32*5*5, 32),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(32, 10)
)
self.conv1 =
nn.Conv2d(ch_in,ch_out,kernel_size=3, stride=stride,padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
forward方法
def forward(self, x):
x = self.conv_unit(x)
# [b, 16, 5, 5] => [b, 16*5*5], x.size(0)就是batch_size大小
x = x.view(x.size(0), -1)
# [b, 16*5*5] => [b, 10]
out = self.fc_unit(x)
return out
神经网络层的函数
nn.Conv2d: 2维卷积
#nn.Conv2d(输入chennel,输出chennel,卷积核大小,stridding,padding)
nn.MaxPool2d:2维池化
#nn.MaxPool2d(卷积核大小,stridding ,padding)
nn.BatchNorm2d:将数据norm到一定范围内,防止梯度离散
#nn.BatchNorm2d(期望输入特征数(一般是chennel数),eps,动量,affine仿射)
nn.Dropout:随机失活
#nn.Dropout(p(不保留的节点比例),inplace=True(原地操作))
举一个手写ResNet18的例子(ResBlk结构如下图所示)
class ResBlk(nn.Module):#Residual BOX块
#stride=2维度减半,可以保证参数不会不停翻倍
def __init__(self, ch_in, ch_out, stride=1):
super(ResBlk, self).__init__()
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential()
if ch_out != ch_in: #防止无法做element-wise加法
self.extra = nn.Sequential(
nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=stride),
nn.BatchNorm2d(ch_out)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
#短接, element-wise加法。[b,ch_in,h,w] => [b,ch_out,h,w]
out = self.extra(x) + out
out = F.relu(out)
return out
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
nn.BatchNorm2d(64)
)
# followed 4 blocks
self.blk1 = ResBlk(64, 128, stride=2)
self.blk2 = ResBlk(128, 256, stride=2)
self.blk3 = ResBlk(256, 512, stride=2)
self.blk4 = ResBlk(512, 512, stride=2)
# output 层
self.outlayer = nn.Linear(512*1*1, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
# [b, 64, h, w] => [b, 512, h, w]
x = self.blk1(x)
x = self.blk2(x)
x = self.blk3(x)
x = self.blk4(x)
# [b, 512, h, w] => [b, 512, 1, 1] 强行池化为1*1大小
x = F.adaptive_avg_pool2d(x, [1, 1])
x = x.view(x.size(0), -1)
x = self.outlayer(x)
return x
以Pokemon数据集为例:
class Pokemon(Dataset):
def __init__(self, root, resize, mode):
super(Pokemon, self).__init__()
self.root = root
self.resize = resize
self.name2label = {} #路径 => label
for name in sorted(os.listdir(os.path.join(root))):
if not os.path.isdir(os.path.join(root, name)):#不是目录
continue
self.name2label[name] = len(self.name2label.keys())
# image, label
self.images, self.labels = self.load_csv('images.csv')
# 三种模式
if mode=='train': # 60%
self.images = self.images[:int(0.6*len(self.images))]
self.labels = self.labels[:int(0.6*len(self.labels))]
elif mode=='val': # 20% = 60%->80%
self.images = self.images[int(0.6*len(self.images)):int(0.8*len(self.images))]
self.labels = self.labels[int(0.6*len(self.labels)):int(0.8*len(self.labels))]
else: # 20% = 80%->100%
self.images = self.images[int(0.8*len(self.images)):]
self.labels = self.labels[int(0.8*len(self.labels)):]
def load_csv(self, filename):
# 如果第一次运行,先保存
if not os.path.exists(os.path.join(self.root, filename)):
images = []
for name in self.name2label.keys():
# name 格式:'pokemon\\mewtwo\\00001.png'
# 匹配该目录下所有符合格式的文件,以list返回
images+=glob.glob(os.path.join(self.root,name,'*.png'))
images+=glob.glob(os.path.join(self.root,name,'*.jpg'))
random.shuffle(images)
with open(os.path.join(self.root, filename), mode='w', newline='') as f:
writer = csv.writer(f)
for img in images:
# img : 'pokemon\\bulbasaur\\00000000.png'
name = img.split(os.sep)[-2]
label = self.name2label[name]
writer.writerow([img, label])
print('writen into csv file:', filename)
# read from csv file
images, labels = [], []
with open(os.path.join(self.root, filename)) as f:
reader = csv.reader(f)
for row in reader:
# 'pokemon\\bulbasaur\\00000000.png', 0
img, label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels)
return images, labels
def __len__(self):
return len(self.images)
def denormalize(self, x_hat): # 维度缩放
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
# mean: [3] => [3, 1, 1]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std + mean
return x
def __getitem__(self, idx):
img, label = self.images[idx], self.labels[idx]
tf = transforms.Compose([
# string path => image data
lambda x:Image.open(x).convert('RGB'),
transforms.Resize(int(self.resize*1.25), int(self.resize*1.25)),
transforms.RandomRotation(15),
transforms.CenterCrop(self.resize),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img)
label = torch.tensor(label)
return img, label
# 求网络参数个数
num = sum(map(lambda p : p.numel(), net.parameters()))
比如ImageNet的数据集合Pokemon数据集的分布比较接近,我就可以使用ImageNet上训练好的模型来解决这个任务,这就是迁移学习。迁移学习的重点在于迁移什么知识以及如何迁移知识,对于如何迁移,我们可以有很多方法。比如取AlexNet输出直接加SVM分类,也可以固定前面参数,重新单独训练最后一层,其中后面这种方法叫做fine-tuning。
trained_model = resnet18(pretrained=True)
model = nn.Sequential( # 取前17层,训练最后一层
*list(trained_model.children())[:-1], #[b, 512, 1, 1]
Flatten(), # [b, 512, 1, 1] => [b, 512]
nn.Linear(512, 5) # 使得输出类别适应本题
).to(device)
# Flatten 层如下
class Flatten(nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
shape = torch.prod(torch.tensor(x.shape[1:])).item()
return x.view(-1, shape)
1]
Flatten(), # [b, 512, 1, 1] => [b, 512]
nn.Linear(512, 5) # 使得输出类别适应本题
).to(device)