网上的大多数博客,都是将某个模型的原理以及如何实现,但很少有人从零开始写如何构建一个项目。接下来,我将从零开始,用迁移学习,十步实现一个视觉项目。
import torch
import torch.nn as nn
from torchvision.models import resnet34, resnet50, resnet101, resnet152, vgg16, vgg19, inception_v3
from torch.nn import functional as F
import torch.optim as optim
import cv2
from torchvision import transforms
from torch.utils import data
from torch.autograd import Variable
import os
def get_pic(img_path):
"""读取图片"""
img = cv2.imread(img_path)
return img
torchvision.transform介绍
def process_pic(img):
"""处理图片"""
transform = transforms.Compose([
transforms.ToPILImage(), # 不转换为PIL会报错
transforms.Resize([64, 64]), # 缩放图片,保持长宽比不变,最短边为 64 像素
transforms.CenterCrop(64), # 从中心开始裁剪,从图片中间切出 64*64 的图片
transforms.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
])
img_tensor = transform(img)
return img_tensor
def get_data(type="Training", num=num):
pic_path = "../data/Fruit360/archive/fruits-360_dataset/fruits-360/" + type
# 图像和标签分开
img_list = []
label_list = []
lab = os.listdir(pic_path)
for index, l in enumerate(lab[:num]):
img_path = os.listdir(pic_path + '/' + l)
for i in img_path:
img_path = pic_path + '/' + l + '/' + i
img = get_pic(img_path)
# img = cv2.resize(img, (64, 64)) # 修改图片的尺寸
img = process_pic(img)
img_list.append(img.numpy())
label_list.append(index)
return img_list, label_list
TensorDataset和DataLoader
train_img_list, train_label_list = get_data("Training")
X_train = torch.Tensor(train_img_list).float()
y_train = torch.tensor(train_label_list)
test_img_list, test_label_list = get_data("Test")
X_test = torch.Tensor(test_img_list).float()
y_test = torch.tensor(test_label_list)
# TensorDataset 可以用来对 tensor 进行打包,就好像python中的zip功能
train_set = data.TensorDataset(*(X_train, y_train))
test_set = data.TensorDataset(*(X_test, y_test))
# DataLoader就是用来包装所使用的数据,每次抛出一批数据
train_loader = data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = data.DataLoader(test_set, batch_size=32, shuffle=True)
# 我们可以通过iter()函数获取这些可迭代对象的迭代器。然后,我们可以对获取到的迭代器不断使⽤next()函数来获取下⼀条数据。
train_sample = next(iter(train_loader))
test_sample = next(iter(test_loader))
迁移学习
model = resnet34(pretrained=True) # pretrained表示是否加载已经与训练好的参数
model_weight_path = '../checkpoints/resnet34-pre.pth'
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path), strict=False)
dim = model.fc.in_features
model.fc = nn.Linear(dim, num) # 将最后的fc层的输出改为标签数量
for param in model.fc.parameters():
param.requires_grad = True
model = model.to(device) # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
criterion = nn.CrossEntropyLoss() # 定义损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01) # 定义优化函数
def train():
model.train()
total_loss = 0
for image, label in train_loader:
image = Variable(image.to(device))
label = Variable(label.to(device))
# 如果不将梯度清零的话,梯度会与上一个batch的数据相关,因此该函数要写在反向传播和梯度下降之前
optimizer.zero_grad()
target = model(image)
loss = criterion(target, label)
# autograd包会根据tensor进行过的数学运算来自动计算其对应的梯度, 产生梯度
loss.backward()
# 通过梯度下降法来更新参数的值
optimizer.step()
total_loss += loss.item()
return total_loss / float(len(train_loader))
def evaluate():
model.eval()
corrects = eval_loss = total_num = 0
for image, label in test_loader:
image = Variable(image.to(device))
label = Variable(label.to(device))
if hasattr(torch.cuda, 'empty_cache'):
torch.cuda.empty_cache()
pred = model(image)
loss = criterion(pred, label)
eval_loss += loss.item()
corrects += (torch.max(pred, 1)[1].view(label.size()).data == label.data).sum()
total_num += len(label)
return eval_loss / float(len(test_loader)), corrects, corrects * 100.0 / total_num, total_num
try:
print('-' * 90)
train_loss = []
valid_loss = []
accuracy = []
for epoch in range(1, 11):
loss = train()
print(f"第{epoch}个周期. 损失值为{loss}")
loss, corrects, acc, size = evaluate()
valid_loss.append(loss * 1000.)
accuracy.append(acc)
print('-' * 10)
print(f"第{epoch}个周期, 正确率为{acc}%")
print('-' * 10)
except KeyboardInterrupt:
print('-' * 90)
print("Exiting from training early")
# 保存训练的模型:
torch.save(model.state_dict(), "../checkpoints/resnet34.pth")
import torch
import torch.nn as nn
from torchvision.models import resnet34, resnet50, resnet101, resnet152, vgg16, vgg19, inception_v3
import numpy as np
import pandas as pd
from torch.nn import functional as F
import torch.optim as optim
import cv2
from torchvision import transforms
import torchvision.datasets as dsets
from torch.utils import data
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num = 25
def get_pic(img_path):
"""读取图片"""
img = cv2.imread(img_path)
return img
def process_pic(img):
"""处理图片"""
transform = transforms.Compose([
transforms.ToPILImage(), # 不转换为PIL会报错
transforms.Resize([64, 64]), # 缩放图片,保持长宽比不变,最短边为 64 像素
transforms.CenterCrop(64), # 从中心开始裁剪,从图片中间切出 64*64 的图片
transforms.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
])
img_tensor = transform(img)
return img_tensor
def get_data(type="Training", num=num):
pic_path = "../data/Fruit360/archive/fruits-360_dataset/fruits-360/" + type
# 图像和标签分开
img_list = []
label_list = []
lab = os.listdir(pic_path)
for index, l in enumerate(lab[:num]):
img_path = os.listdir(pic_path + '/' + l)
for i in img_path:
img_path = pic_path + '/' + l + '/' + i
img = get_pic(img_path)
# img = cv2.resize(img, (64, 64)) # 修改图片的尺寸
img = process_pic(img)
img_list.append(img.numpy())
label_list.append(index)
return img_list, label_list
train_img_list, train_label_list = get_data("Training")
X_train = torch.Tensor(train_img_list).float()
y_train = torch.tensor(train_label_list)
test_img_list, test_label_list = get_data("Test")
X_test = torch.Tensor(test_img_list).float()
y_test = torch.tensor(test_label_list)
# TensorDataset 可以用来对 tensor 进行打包,就好像python中的zip功能
train_set = data.TensorDataset(*(X_train, y_train))
test_set = data.TensorDataset(*(X_test, y_test))
# DataLoader就是用来包装所使用的数据,每次抛出一批数据
train_loader = data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = data.DataLoader(test_set, batch_size=32, shuffle=True)
# 我们可以通过iter()函数获取这些可迭代对象的迭代器。然后,我们可以对获取到的迭代器不断使⽤next()函数来获取下⼀条数据。
train_sample = next(iter(train_loader))
test_sample = next(iter(test_loader))
model = resnet34(pretrained=True) # pretrained表示是否加载已经与训练好的参数
model_weight_path = '../checkpoints/resnet34-pre.pth'
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path), strict=False)
dim = model.fc.in_features
model.fc = nn.Linear(dim, num) # 将最后的fc层的输出改为标签数量
for param in model.fc.parameters():
param.requires_grad = True
model = model.to(device) # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
criterion = nn.CrossEntropyLoss() # 定义损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01) # 定义优化函数
def train():
model.train()
total_loss = 0
for image, label in train_loader:
image = Variable(image.to(device))
label = Variable(label.to(device))
# 如果不将梯度清零的话,梯度会与上一个batch的数据相关,因此该函数要写在反向传播和梯度下降之前
optimizer.zero_grad()
target = model(image)
loss = criterion(target, label)
# autograd包会根据tensor进行过的数学运算来自动计算其对应的梯度, 产生梯度
loss.backward()
# 通过梯度下降法来更新参数的值
optimizer.step()
total_loss += loss.item()
return total_loss / float(len(train_loader))
def evaluate():
model.eval()
corrects = eval_loss = total_num = 0
for image, label in test_loader:
image = Variable(image.to(device))
label = Variable(label.to(device))
if hasattr(torch.cuda, 'empty_cache'):
torch.cuda.empty_cache()
pred = model(image)
loss = criterion(pred, label)
eval_loss += loss.item()
corrects += (torch.max(pred, 1)[1].view(label.size()).data == label.data).sum()
total_num += len(label)
return eval_loss / float(len(test_loader)), corrects, corrects * 100.0 / total_num, total_num
try:
print('-' * 90)
train_loss = []
valid_loss = []
accuracy = []
for epoch in range(1, 11):
loss = train()
print(f"第{epoch}个周期. 损失值为{loss}")
loss, corrects, acc, size = evaluate()
valid_loss.append(loss * 1000.)
accuracy.append(acc)
print('-' * 10)
print(f"第{epoch}个周期, 正确率为{acc}%")
print('-' * 10)
except KeyboardInterrupt:
print('-' * 90)
print("Exiting from training early")
# 保存训练的模型:
torch.save(model.state_dict(), "../checkpoints/resnet34.pth")
单独创建一个文件demo,进行单张图片测试
import torch
from torchvision.models import resnet34
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import cv2
import matplotlib.pyplot as plt
num = 25
model = resnet34(pretrained=True) # pretrained表示是否加载已经与训练好的参数
model.fc = nn.Linear(model.fc.in_features, num) # 将最后的fc层的输出改为标签数量
model = model.cuda() # 如果有GPU,而且确认使用则保留;如果没有GPU,请删除
# 加载预训练模型的参数
model.load_state_dict(torch.load("./checkpoints/resnet34.pth"))
model.eval()
def get_pic(img_path):
"""读取图片"""
img = cv2.imread(img_path)
return img
def process_pic(img):
"""处理图片"""
transform = transforms.Compose([
transforms.ToPILImage(), # 不转换为PIL会报错
transforms.Resize([64, 64]), # 缩放图片,保持长宽比不变,最短边为 64 像素
transforms.CenterCrop(64), # 从中心开始裁剪,从图片中间切出 64*64 的图片
transforms.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1], 将(H, W, C) -> [C, H, W]
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
])
img_tensor = transform(img)
return img_tensor
pic_path = "./data/Fruit360/archive/fruits-360_dataset/fruits-360/Test/Banana Lady Finger"
img = get_pic(pic_path + "/33_100.jpg")
img = process_pic(img)
with torch.no_grad():
output = model(torch.unsqueeze(img, 0).cuda())
# 输出概率最大的类别
predict = F.softmax(output, dim=1)
predict_cla = torch.argmax(predict).cpu().numpy()
print(str(predict_cla+1))