本次的内容是用pytorch写一个简单的分类算法,选择了200鸟群的数据集,数据集的话可以自己到网上去找,挺容易的。
项目中所有的文件组成
config.py用于配置一些主要的参数
datalist.py用于数据的输出
inference.py为模型的推理部分
ShuffleNet.py模型
train.py训练模型
utils.py配合的一些小函数
以下为主要内容
import argparse
'''
training settings
metavar参数,用来控制部分命令行参数的显示
'''
parser = argparse.ArgumentParser(description='PyTorch Example for all')
parser.add_argument('--train-batch-size', type=int, default=8, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=16, metavar='N',
help='input batch size for testing (default: 64)')
parser.add_argument('--epochs', type=int, default=500, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.0001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=123, metavar='S',
help='random seed 设置种子的用意是一旦固定种子,后面依次生成的随机数其实都是固定的,有利于实验结果的产生与比较')
parser.add_argument('--use_cuda', type=bool, default=True,
help='whether to use cuda to accerlate')
parser.add_argument('--base_data_path', type=str, default='G:/数据集/分类/',
help="total base data path for training")
parser.add_argument('--resume', type=bool, default=True, metavar='R',
help="whether to use the pretrained model to start the train")
parser.add_argument('--saved_model', type=str, default="E:/完成工作/trained_model/",
help="the path to store the weight")
parser.add_argument('--val_num', type=float, default=0.3,
help="perecentage of validate data")
parser.add_argument('--pretrained_weight', type=str, default="E:/完成工作/trained_model/",
help="the path to load the pytorch weight")
parser.add_argument('--save', type=bool, default=True,
help="whether to save the model weight")
parser.add_argument('--project_name', type=str, default='鸟群分类',
help="该项目的工程名称")
parser.add_argument('--use_aug', type=bool, default=False,
help='使用数据增广,增加数据多样性')
parser.add_argument('--model_name',type=str,default="shuffle_net",
help='model name')
from random import shuffle
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from config import parser
args = parser.parse_args()
'''
1. 对图片进行按比例缩放
2. 对图片进行随机位置的截取
3. 对图片进行随机的水平和竖直翻转
4. 对图片进行随机角度的旋转
5. 对图片进行亮度、对比度和颜色的随机变化
'''
# 自己写Dataset至少需要有这样的格式
class Dataset(Dataset):
def __init__(self, lines):
super(Dataset, self).__init__()
self.base_path = args.base_data_path
self.annotation_lines = lines
self.train_batches = len(self.annotation_lines)
def __len__(self):
return self.train_batches
def __getitem__(self, index):
if index == 0:
shuffle(self.annotation_lines)
n = len(self.annotation_lines)
index = index % n
img, y = self.collect_image_label(self.annotation_lines[index])
if args.use_aug:
img = self.img_augment(img)
img = img.resize((32, 32), Image.BICUBIC)
img = np.array(img, dtype=np.float32)
temp_img = np.transpose(img / 255.0)
temp_y = int(y) - 1
return temp_img, temp_y
def collect_image_label(self, line):
line = line.split('*')
image_path = line[0]
label = line[1]
image = Image.open(image_path).convert("RGB")
return image, label
def rand(self, a=0, b=1):
return np.random.rand() * (b - a) + a
def img_augment(self, image):
# 随机位置裁剪
random_crop = self.rand() < 0.5
# 中心裁剪
center_crop = self.rand() < 0.5
# 填充后随机裁剪
random_crop_padding = self.rand() < 0.5
# 水平翻转
h_flip = self.rand() < 0.5
# 竖直翻转
v_flip = self.rand() < 0.5
# 亮度
bright = self.rand() < 0.5
# 对比度
contrast = self.rand() < 0.5
# 饱和度
saturation = self.rand() < 0.5
# 颜色随机变换
color = self.rand() < 0.5
compose = self.rand() < 0.5
# 旋转30
rotate = self.rand() < 0.5
if h_flip:
image = transforms.RandomHorizontalFlip()(image)
if v_flip:
image = transforms.RandomVerticalFlip()(image)
if rotate:
image = transforms.RandomRotation(30)(image)
if bright:
image = transforms.ColorJitter(brightness=1)(image)
if contrast:
image = transforms.ColorJitter(contrast=1)(image)
if saturation:
image = transforms.ColorJitter(saturation=1)(image)
if color:
image = transforms.ColorJitter(hue=0.5)(image)
if compose:
image = transforms.ColorJitter(0.5, 0.5, 0.5)(image)
if random_crop:
image = transforms.RandomCrop(100)(image)
if center_crop:
image = transforms.CenterCrop(100)(image)
if random_crop_padding:
image = transforms.RandomCrop(100, padding=8)(image)
return image
if __name__ == "__main__":
Dataset()
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N, C, H, W = x.size()
g = self.groups
# 维度变换之后必须要使用.contiguous()使得张量在内存连续之后才能调用view函数
return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride
# bottleneck层中间层的channel数变为输出channel数的1/4
mid_planes = int(out_planes / 4)
g = 1 if in_planes == 24 else groups
# 作者提到不在stage2的第一个pointwise层使用组卷积,因为输入channel数量太少,只有24
self.conv1 = nn.Conv2d(in_planes, mid_planes,
kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes,
kernel_size=3, stride=stride, padding=1,
groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes,
kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out, res], 1)) if self.stride == 2 else F.relu(out + res)
return out
class ShuffleNet(nn.Module):
def __init__(self, cfg,num_class=200):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], num_class)
def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
if i == 0:
layers.append(Bottleneck(self.in_planes,
out_planes - self.in_planes,
stride=2, groups=groups))
else:
layers.append(Bottleneck(self.in_planes,
out_planes,
stride=1, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ShuffleNetG2():
cfg = {
'out_planes': [200, 400, 800],
'num_blocks': [4, 8, 4],
'groups': 2
}
return ShuffleNet(cfg)
def ShuffleNetG3():
cfg = {
'out_planes': [240, 480, 960],
'num_blocks': [4, 8, 4],
'groups': 3
}
return ShuffleNet(cfg)
if __name__=="__main__":
model = ShuffleNetG2()
print(model)
from __future__ import print_function
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm
from config import parser
from datalist import Dataset
from ShuffleNet import ShuffleNetG2
from utils import Color_print
# import torchvision
'''
细度分类
'''
best_acc = 0
class train(object):
def __init__(self):
self.args = parser.parse_args()
print(f"-----------{self.args.project_name}-------------")
use_cuda = self.args.use_cuda and torch.cuda.is_available()
if use_cuda:
torch.cuda.manual_seed(self.args.seed) # 为当前GPU设置随机种子
else:
torch.manual_seed(self.args.seed) # 为CPU设置种子用于生成随机数,以使得结果是确定的
self.device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} # num_workers的值容易影响调试的是否成功
'''
构造DataLoader
'''
self.images_path = os.path.join(self.args.base_data_path,
"鸟_class_num=200/data/CUB_200_2011/images.txt")
self.labels_path = os.path.join(self.args.base_data_path,
"鸟_class_num=200/data/CUB_200_2011/image_class_labels.txt")
self.annotation_lines = self.get_image_label()
np.random.seed(10101) # 保证实验的可重复性
np.random.shuffle(self.annotation_lines)
np.random.seed(None)
self.num_val = int(len(self.annotation_lines) * self.args.val_num)
self.num_train = len(self.annotation_lines) - self.num_val
self.train_loader = DataLoader(
Dataset(self.annotation_lines[:self.num_train]),
batch_size=self.args.train_batch_size, shuffle=True, **kwargs)
self.test_loader = DataLoader(
Dataset(self.annotation_lines[self.num_train + 1:]),
batch_size=self.args.test_batch_size, shuffle=False, **kwargs)
'''
定义选择模型
'''
# 直接调用torchvision里预设好的模型及参数
# self.model = torchvision.models.shufflenet_v2_x1_0(pretrained=True, num_classes=1000).to(self.device)
# self.model.fc = nn.Linear(self.model.fc.in_features, 200)
self.model = ShuffleNetG2().to(self.device)
'''
根据需要加载与训练的模型权重参数
'''
if self.args.resume:
try:
print("load the weight from pretrained-weight file")
model_dict = self.model.state_dict()
pretrained_dict = torch.load(self.args.pretrained_weight, map_location=self.device)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
model_dict.update(pretrained_dict)
self.model.load_state_dict(model_dict)
print("Finished to load the weight")
except:
print("can not load weight \n train the model from stratch")
self.model.apply(self.weights_init)
'''
cuda 加速
'''
if use_cuda:
self.model = torch.nn.DataParallel(self.model,
device_ids=range(torch.cuda.device_count())) # parallel use GPU
cudnn.benchmark = True # speed up slightly
'''
构造loss目标函数
选择优化器
学习率变化选择
'''
self.criterion = nn.CrossEntropyLoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr)
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.9)
self.scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer,T_max=5,eta_min=1e-5)
'''
模型开始训练
'''
for epoch in range(1, self.args.epochs + 1):
self.train(epoch)
self.test(epoch)
# 清除部分无用变量
torch.cuda.empty_cache()
Color_print("finish model training")
'''
train部分
'''
def train(self, epoch):
self.model.train()
average_loss = []
pbar = tqdm(self.train_loader,
desc=f'Train Epoch{epoch}/{self.args.epochs}')
for data, target in pbar:
data, target = data.to(self.device), target.to(self.device)
self.optimizer.zero_grad() # 模型参数梯度清零
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
loss.backward()
average_loss.append(loss.item())
self.optimizer.step()
pbar.set_description(f'Train Epoch:{epoch}/{self.args.epochs} train_loss:{round(np.mean(average_loss), 2)}')
self.scheduler.step()
'''
test部分
'''
def test(self, epoch):
global best_acc
self.model.eval()
test_loss = 0
correct = torch.zeros(1).squeeze().cuda()
total = torch.zeros(1).squeeze().cuda()
average_loss = []
pbar = tqdm(self.test_loader,
desc=f'Test Epoch{epoch}/{self.args.epochs}',
mininterval=0.3)
for data, target in pbar:
data, target = data.to(self.device), target.to(self.device)
with torch.no_grad():
data, target = Variable(data), Variable(target)
output = self.model(data)
average_loss.append(self.criterion(output, target).item())
test_loss += self.criterion(output, target).item() # sum up batch loss
pred = torch.argmax(output, 1)
correct += (pred == target).sum().float()
total += len(target)
pbar.set_description(
f'Test Epoch:{epoch}/{self.args.epochs} test_loss:{round(np.mean(average_loss), 2)}')
predict_acc = correct / total
if self.args.save and predict_acc > best_acc:
best_acc = predict_acc
if not os.path.isdir(self.args.saved_model + self.args.project_name):
os.mkdir(self.args.saved_model + self.args.project_name)
torch.save({
'epoch': epoch,
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'loss': round(np.mean(average_loss), 2)
},
self.args.saved_model + self.args.project_name + f'/Epoch-{epoch}-Test_loss-{round(np.mean(average_loss), 4)}.pth')
percentage = round(predict_acc.item(), 4) * 100
Color_print(
f"\n预测准确率:{percentage}% "
f"预测数量:{correct}/{total},"
f"保存路径:{self.args.saved_model + self.args.project_name}/{self.args.model_name}-Epoch-{epoch}-Test_loss-{round(np.mean(average_loss), 4)}.pth'")
def get_image_label(self):
images = []
labels = []
with open(self.images_path) as f:
for line in f.readlines():
images.append(line.split()[-1])
with open(self.labels_path) as f:
for line in f.readlines():
labels.append(line.split()[-1])
lines = []
for image, label in zip(images, labels):
lines.append(
"E:/Datasets2/Caltech-UCSD Birds-200-2011/data/CUB_200_2011/images/" + str(image) + '*' + str(label))
return lines
def weights_init(self, m):
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
nn.init.constant_(m.bias, 0)
# 也可以判断是否为conv2d,使用相应的初始化方式
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# 是否为批归一化层
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if __name__ == "__main__":
train()
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
def Color_print(line):
print(bcolors.OKGREEN + line + bcolors.ENDC)
from __future__ import print_function
import sys
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from torch.autograd import Variable
from config import parser
from ShuffleNet import ShuffleNetG3
'''
细度分类,用BCNN
'''
class inferencce(object):
def __init__(self, image_path):
self.args = parser.parse_args()
self.classes = self.read_classes()
use_cuda = self.args.use_cuda and torch.cuda.is_available()
torch.manual_seed(self.args.seed)
self.device = torch.device("cuda" if use_cuda else "cpu")
self.model = ShuffleNetG3().to(self.device)
self.model = torch.nn.DataParallel(self.model).cuda()
if self.args.resume:
try:
print("load the weight from pretrained-weight file")
model_dict = self.model.state_dict()
checkpoint = torch.load(self.args.pretrained_weight + "鸟群分类/Epoch-26-Test_loss-4.6228.pth", map_location=self.device)
pretrained_dict = checkpoint['model_state_dict']
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
model_dict.update(pretrained_dict)
self.model.load_state_dict(model_dict)
print("Finished to load the weight")
except:
print("can not load weight")
sys.exit()
if use_cuda:
cudnn.benchmark = True
self.image = Image.open(image_path).convert("RGB")
self.image = self.image.resize((32, 32), Image.BICUBIC)
self.image = np.array(self.image, dtype=np.float32)
self.image = np.transpose(self.image / 255)
self.predict()
def predict(self):
with torch.no_grad():
self.image = torch.from_numpy(self.image)
self.image = Variable(torch.unsqueeze(self.image, dim=0).float(), requires_grad=False)
if self.args.use_cuda:
self.image = self.image.cuda()
output = self.model(self.image)
pred = torch.argmax(output, 1)
print(int(pred.item()))
'''
预测类别
'''
print(self.classes[pred.item()])
def read_classes(self):
path = "E:/Datasets2/Caltech-UCSD Birds-200-2011/data/CUB_200_2011/classes.txt"
a = []
with open(path, 'r') as f:
for line in f.readlines():
line = line.split()[-1]
a.append(line)
return a
if __name__ == "__main__":
path = "C:/Users/lth/Desktop/0.jpg"
inferencce(path)
这里由于我做的鸟群分类,存在类间相似,类内差异大的特点,属于细度分类,所以训练不像cifar,mnist那样容易出好结果,我训练了大改50个epoch之后勉强能猜对一半,欢迎有人能指出我代码的不足或者给我更好的学习意见,谢谢了