argparse:是python用于解析命令行参数和选项的标准模块
例如 python parseTest.py input.txt output.txt --user=name --port=8080
使用步骤:
1:import argparse
2:parser = argparse.ArgumentParser()
3:parser.add_argument()
4:parser.parse_args()
每一个add_argument方法对应一个你要关注的参数或选项;最后调用
parse_args() 方法进行解析
其中命令行参数如果没给定,且没有设置defualt,则出错。但是如果是选项的话,则设置为None
torch.utils.data.DataLoader:PyTorch中数据读取的一个重要接口是torch.utils.data.DataLoader,该接口定义在dataloader.py脚本中,只要是用PyTorch来训练模型基本都会用到该接口,
该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入,
因此该接口有点承上启下的作用,比较重要。
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from deform_conv import DeformConv2D
from time import time
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=32, metavar='N',
help='input batch size for testing (default: 32)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
'''
编写与设备无关的代码
(可用时受益于 GPU 加速,不可用时会倒退回 CPU)时,选择并保存适当的 torch.device, 不失为一种好方法,它可用于确定存储张量的位置。
device = torch.device('cuda' if args.cuda else 'cpu')
'''
torch.manual_seed(args.seed) #为CPU设置种子用于生成随机数,以使得结果是确定的
if args.cuda:
'''为当前GPU设置随机种子;
如果使用多个GPU,应该使用torch.cuda.manual_seed_all()为所有的GPU设置种子。
'''
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
"""
加载数据。组合数据集和采样器,提供数据上的单或多进程迭代器
参数:
dataset:Dataset类型,从其中加载数据
batch_size:int,可选。每个batch加载多少样本
shuffle:bool,可选。为True时表示每个epoch都对数据进行洗牌
sampler:Sampler,可选。从数据集中采样样本的方法。
num_workers:int,可选。加载数据时使用多少子进程。默认值为0,表示在主进程中加载数据。
collate_fn:callable,可选。
pin_memory:bool,可选
drop_last:bool,可选。True表示如果最后剩下不完全的batch,丢弃。False表示不丢弃。
"""
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./MNIST', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./MNIST', train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
class DeformNet(nn.Module):
def __init__(self):
super(DeformNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.offsets = nn.Conv2d(128, 18, kernel_size=3, padding=1)
self.conv4 = DeformConv2D(128, 128, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(128)
self.classifier = nn.Linear(128, 10)
def forward(self, x):
# convs
x = F.relu(self.conv1(x))
x = self.bn1(x)
x = F.relu(self.conv2(x))
x = self.bn2(x)
x = F.relu(self.conv3(x))
x = self.bn3(x)
# deformable convolution
offsets = self.offsets(x)
x = F.relu(self.conv4(x, offsets))
x = self.bn4(x)
x = F.avg_pool2d(x, kernel_size=28, stride=1).view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x, dim=1)
class PlainNet(nn.Module):
def __init__(self):
super(PlainNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(128)
self.classifier = nn.Linear(128, 10)
def forward(self, x):
# convs
x = F.relu(self.conv1(x))
x = self.bn1(x)
x = F.relu(self.conv2(x))
x = self.bn2(x)
x = F.relu(self.conv3(x))
x = self.bn3(x)
x = F.relu(self.conv4(x))
x = self.bn4(x)
x = F.avg_pool2d(x, kernel_size=28, stride=1).view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x, dim=1)
model = DeformNet()
'''
将所有的模型参数移动到GPU上
if args.cuda:
model.cuda()将所有的模型参数移动到GPU上
'''
def init_weights(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform(m.weight, gain=nn.init.calculate_gain('relu'))
if m.bias is not None:
m.bias.data = torch.FloatTensor(m.bias.shape[0]).zero_()
def init_conv_offset(m):
m.weight.data = torch.zeros_like(m.weight.data)
if m.bias is not None:
m.bias.data = torch.FloatTensor(m.bias.shape[0]).zero_()
model.apply(init_weights)
model.offsets.apply(init_conv_offset)
if args.cuda:
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
def train(epoch):
model.train() #把module设成training模式,对Dropout和BatchNorm有影响
for batch_idx, (data, target) in enumerate(train_loader):
'''
Variable类对Tensor对象进行封装,会保存该张量对应的梯度,以及对生成该张量的函数grad_fn的一个引用。
如果该张量是用户创建的,grad_fn是None,称这样的Variable为叶子Variable。
'''
data, target = Variable(data), Variable(target)
if args.cuda:
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target) #负log似然损失
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def test():
model.eval() #把module设置为评估模式,只对Dropout和BatchNorm模块有影响
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, args.epochs + 1):
since = time()
train(epoch)
iter = time() - since
print("Spends {}s for each training epoch".format(iter/args.epochs))
test()
from torch.autograd import Variable, Function
import torch
from torch import nn
import numpy as np
class DeformConv2D(nn.Module):
def __init__(self, inc, outc, kernel_size=3, padding=1, bias=None):
super(DeformConv2D, self).__init__()
self.kernel_size = kernel_size
self.padding = padding
self.zero_padding = nn.ZeroPad2d(padding)
self.conv_kernel = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
def forward(self, x, offset):
dtype = offset.data.type()
ks = self.kernel_size
N = offset.size(1) // 2
# Change offset's order from [x1, x2, ..., y1, y2, ...] to [x1, y1, x2, y2, ...]
# Codes below are written to make sure same results of MXNet implementation.
# You can remove them, and it won't influence the module's performance.
offsets_index = Variable(torch.cat([torch.arange(0, 2*N, 2), torch.arange(1, 2*N+1, 2)]), requires_grad=False).type_as(x).long()
offsets_index = offsets_index.unsqueeze(dim=0).unsqueeze(dim=-1).unsqueeze(dim=-1).expand(*offset.size())
offset = torch.gather(offset, dim=1, index=offsets_index)
# ------------------------------------------------------------------------
if self.padding:
x = self.zero_padding(x)
# (b, 2N, h, w)
p = self._get_p(offset, dtype)
# (b, h, w, 2N)
p = p.contiguous().permute(0, 2, 3, 1)
q_lt = Variable(p.data, requires_grad=False).floor()
q_rb = q_lt + 1
q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], -1)
q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], -1)
# (b, h, w, N)
mask = torch.cat([p[..., :N].lt(self.padding)+p[..., :N].gt(x.size(2)-1-self.padding),
p[..., N:].lt(self.padding)+p[..., N:].gt(x.size(3)-1-self.padding)], dim=-1).type_as(p)
mask = mask.detach()
floor_p = p - (p - torch.floor(p))
p = p*(1-mask) + floor_p*mask
p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
# bilinear kernel (b, h, w, N)
g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
# (b, c, h, w, N)
x_q_lt = self._get_x_q(x, q_lt, N)
x_q_rb = self._get_x_q(x, q_rb, N)
x_q_lb = self._get_x_q(x, q_lb, N)
x_q_rt = self._get_x_q(x, q_rt, N)
# (b, c, h, w, N)
x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
g_rb.unsqueeze(dim=1) * x_q_rb + \
g_lb.unsqueeze(dim=1) * x_q_lb + \
g_rt.unsqueeze(dim=1) * x_q_rt
x_offset = self._reshape_x_offset(x_offset, ks)
out = self.conv_kernel(x_offset)
return out
def _get_p_n(self, N, dtype):
p_n_x, p_n_y = np.meshgrid(range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), indexing='ij')
# (2N, 1)
p_n = np.concatenate((p_n_x.flatten(), p_n_y.flatten()))
p_n = np.reshape(p_n, (1, 2*N, 1, 1))
p_n = Variable(torch.from_numpy(p_n).type(dtype), requires_grad=False)
return p_n
@staticmethod
def _get_p_0(h, w, N, dtype):
p_0_x, p_0_y = np.meshgrid(range(1, h+1), range(1, w+1), indexing='ij')
p_0_x = p_0_x.flatten().reshape(1, 1, h, w).repeat(N, axis=1)
p_0_y = p_0_y.flatten().reshape(1, 1, h, w).repeat(N, axis=1)
p_0 = np.concatenate((p_0_x, p_0_y), axis=1)
p_0 = Variable(torch.from_numpy(p_0).type(dtype), requires_grad=False)
return p_0
def _get_p(self, offset, dtype):
N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
# (1, 2N, 1, 1)
p_n = self._get_p_n(N, dtype)
# (1, 2N, h, w)
p_0 = self._get_p_0(h, w, N, dtype)
p = p_0 + p_n + offset
return p
def _get_x_q(self, x, q, N):
b, h, w, _ = q.size()
padded_w = x.size(3)
c = x.size(1)
# (b, c, h*w)
x = x.contiguous().view(b, c, -1)
# (b, h, w, N)
index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y
# (b, c, h*w*N)
index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
return x_offset
@staticmethod
def _reshape_x_offset(x_offset, ks):
b, c, h, w, N = x_offset.size()
x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
return x_offset
可变形卷积在网络结构书写时,需要经过以下两步
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.offsets = nn.Conv2d(128, 18, kernel_size=3, padding=1)
self.conv4 = DeformConv2D(128, 128, kernel_size=3, padding=1)