Pytorch中有许多需要注意的地方,这里总结一下
1、要保证输出图片的格式是一致的
train_data=CustomDataset(file_list,transform=transforms.Compose([
transforms.Resize(512),# 要保证数据输入大小一致
transforms.RandomCrop(224),
transforms.ToTensor()]))
data_loader = DataLoader(train_data, batch_size=2,shuffle=True)
print(len(data_loader))
print(data_loader)
for data,lable in data_loader: # 循环输出
print(data, lable)
2、对于每个图片的处理
class CustomDataset(Dataset):#需要继承data.Dataset
def __init__(self,file_list_dir,transform= None):
# TODO
# 1. Initialize file path or list of file names.
# self.image_dir = '/root/data/history/angle'
self.image_file_lists = file_list_dir
self.len = len(self.image_file_lists)
self.transform = transform
def __getitem__(self, index):
# TODO
# 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
# 2. Preprocess the data (e.g. torchvision.Transform).
# 3. Return a data pair (e.g. image and label).
#这里需要注意的是,第一步:read one data,是一个data
index = index % self.len
image_file = self.image_file_lists[index]
# print(image_file)
image ,label = process_img(image_file) # 主要是读图片和label
image = self.transform(image) # 这里就用到了前面的统一处理transform
return image,label
def __len__(self):
# You should change 0 to the total size of your dataset.
return len(self.image_file_lists)
1、模型参数初始化
这个就是初步的参数设置,一般来说是通用的
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
2、fineturn与训练模型
主要是输出分类不同,需要调整最后一个全连接层
def mobilenet_v2(pretrained=True):
model = MobileNetV2(input_size=320,width_mult=1)
state_dict = torch.load("mobilenetv2_1.0_f2a8633.pth")
model.load_state_dict(state_dict,strict=False)
model.classifier = nn.Linear(1280,4) # 4替换原来的1000
3、模型features的处理
self.features = [conv_bn(3, input_channel, 2)] # 先定义为数组list
#....
# building last several layers
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
# make it nn.Sequential
self.features = nn.Sequential(*self.features) # 数组解包再组成为Sequential
4、推理forward
这个就是处理计算逻辑的,后面实例化的时候,括号里面的处理方法
def forward(self, x):
x = self.features(x) #[8, 1280, 7, 7]
x = x.mean(3).mean(2) #avgpooling # [8, 1280]
x = self.classifier(x) #[8, 4]
return x
1、定义好全局变量
writer = SummaryWriter('./logs/') # 写日志专用
# 定义训练数据
train_data_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_data_loader = DataLoader(
val_data, batch_size=BATCH_TEST_SIZE, shuffle=True)
use_gpu = torch.cuda.is_available() # 是否启用GPU
model = mobilnet_v2.mobilenet_v2(True) # 定义model
# Optimizer and criterion 定义优化器和loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
# 定义是否使用多卡训练,这个是最简便的,还有高效的distributed,但是比较麻烦点
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
# dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
model = nn.DataParallel(model) # 多卡训练
if use_gpu:
modle = model.cuda() # 将模型转到GPU上
2、开始训练
训练时需要注意主要的数据转GPU上、梯度清零、推理后反向传播、优化器修改参数、准确率计算、日志记录、模型存储等
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
model.train() # 这里是训练是的model,dropout、batch_normliazition
train_loss = 0.0
correct = 0.
for batch_idx, data in enumerate(train_data_loader):
try:
imgs, angle = data
if use_gpu:
imgs, angle = imgs.cuda(), angle.cuda() # 将数据转到GPU上
optimizer.zero_grad() # 清空梯度
output = model(imgs) # 预测结果
pred = output.data.max(1)[1]
# 预测准确个数,先取data,然后转到cpu上,然后相加
correct += pred.eq(angle.data).cpu().sum()
loss = criterion(output, angle)
loss.backward() # 反向传播
optimizer.step() # 优化器优化参数
if (batch_idx+1) % 100 == 0:
print(f'this is {str(batch_idx+1)}batch, and loss is {loss.item()}')
total_step += 1 # 总步数加一
if (batch_idx+1) % 1000 == 0:
acc = correct/((batch_idx+1)*BATCH_SIZE)
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Acc: {:.4f}'
.format(epoch+1, num_epochs, batch_idx+1, len(train_data_loader), loss.item(), acc))
writer.add_scalar('Loss/test', loss, total_step) # 开始记录准确率等
writer.add_scalar('ACC', acc, total_step)
except Exception as e:
print(e)
torch.save(model.state_dict(), f'./save_mode_dir/{str(epoch)}.pth') # 存储模型 ,当然最好是model.model.state_dict(),这样便于后面的单卡推理
test_acc(model, epoch, criterion, val_data_loader) # 进行准确率测试
1、关于预测也要注意一下模型的载入、预测、关闭梯度监控等
# for data,angle in data_loader:
# print(data, angle)
model = mobilnet_v2.mobilenet_v2(True)
#下面model由于是多卡训练的,每个keys都多了model.model,而不是model.所以要多卡方式加载
model = torch.nn.parallel.DataParallel(model, device_ids = [0])
# checkpoint = torch.load('./save_mode_dir/9.pth')
#modelpath是你要加载训练好的模型文件地址
# model.load_state_dict(checkpoint['state_dict'])
# output = model(x)
use_gpu = torch.cuda.is_available() # 是否启用GPU
model.load_state_dict(torch.load('./save_mode_dir/9.pth'))
model.eval() # 模型的推理模式,主要解决dropout、batch normalization等问题
correct = 0.
with torch.no_grad(): # 这个是为了不跟踪梯度,因为预测不需要,节省空间
for batch_idx, data in enumerate(val_data_loader):
imgs, angle = data
if use_gpu:
imgs, angle = imgs.cuda(), angle.cuda() # 将数据转到GPU上
output = model(imgs) # 预测结果
# get the index of the max log-probability
pred = output.data.max(1)[1]
correct += pred.eq(angle.data).cpu().sum()
# loss function already averages over batch size
acc = correct / len(val_data_loader.dataset)
print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(
correct, len(val_data_loader.dataset), 100. * acc))