本文为8月3日Pytorch笔记,分为十一个章节:
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081))
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(train_loader):
data = data.view(-1, 28*28)
data, target = data.to(device), target.cuda()
logits = net(data)
loss = criteon(logits, target)
optimizer.zero_grad()
loss.backward()
# print(w1.grad.norm(), w2.grad.norm())
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
print('train: ', len(train_db), 'test: ', len(test_db))
train_db, val_db = torch.utils.data.random_dplit(train_db, [50000, 10000])
print('db1: ', len(train_db), 'db2: ', len(val_db))
train_loader = torch.utils.data.DataLoader(
train_db,
batch_size = batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
val_db,
batch_size = batch_size, shuffle=True)
J ( θ ) = − 1 m ∑ i = 1 m [ y i l n y ^ i + ( 1 − y i ) l n ( 1 − y ^ i ) ] + λ ∑ i = 1 n ∣ θ i ∣ J(\theta) = -\frac{1}{m} \sum_{i=1}^{m} [y_i ln\hat{y}_i + (1-y_i)ln(1 - \hat{y}_i) ] + \lambda \sum_{i=1}^{n} |\theta _i| J(θ)=−m1i=1∑m[yilny^i+(1−yi)ln(1−y^i)]+λi=1∑n∣θi∣
regularization = 0
for param in model.parameters():
regularization_loss += torch.sum(torch.abs(param))
classify_loss = criteon(logits, target)
loss = classify_loss + 0.01 * regularization_loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
J ( W ; X , y ) + 1 2 λ ⋅ ∣ ∣ W ∣ ∣ 2 J(W; X, y) + \frac{1}{2}\lambda \cdot ||W||^2 J(W;X,y)+21λ⋅∣∣W∣∣2
device = torch.device('cuda:0')
net = MLP().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rates, weight_decay=0.01)
criteon = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)
scheduler = ReduceLROnPlateau(optimizer, 'min')
for epoch in xrange(args.start_epoch, args.epochs):
train(train_loader, model, criterion, optimizer, epoch)
result_avg, loss_val = validate(val_loader, model, criterion, epoch)
scheduler.step(loss_val)
# Assuming optimizer uses lr=0.05 for all groups
# lr = 0.05 if epch < 30
# lr = 0.005 if 30 <= epoch <60
# lr = 0.0005 if 60 <=epoch < 90
# ……
scheduler = StepLR(optimizer, step_size=30, gamma=0.1):
scheduler.step()
train(……)
validate(……)
net_dropped = torch.nn.Sequential(
torch.nn.Linear(784, 200),
torch.nn.Dropout(0.5),
torch.nn.ReLU(),
torch.nn.Linear(200, 200),
torch.nn.Dropout(0.5),
torch.nn.ReLU(),
torch.nn.Linear(200, 10)
)
nn.Conv2d:
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=0)
x = torch.rand(1, 1, 28, 28)
out = layer.forward(x)
out.shape
>>> torch.Size([1, 3, 26, 26])
layer = nn.Conv2d(1, 3, kernel_size=3, stride=2, padding=1)
out = layer.forward(x)
out.shape
>>> torch.Size([1, 3, 14, 14])
out = layer(x)
out.shape
>>> torch.Size([1, 3, 14, 14])
w = torch.rand(16, 3, 5, 5)
b = torch.rand(16)
x = torch.rand(1, 3, 28, 28)
out = F.conv2d(x, w, b, stride=1, padding=1)
out.shape
>>> torch.Size([1, 16, 26, 26])
out = F.conv2d(x, w, b, stride=2, padding=2)
out.shape
>>> torch.Size([1, 16, 14, 14])
x.shape
>>> torch.Size([1, 16, 14, 14])
layer = nn.MaxPool2d(2, stride=2)
out = layer(x)
out.shape
>>> torch.Size([1, 16, 7, 7])
out = F.avg_pool2d(x, 2, stride=2)
out.shape
>>> torch.Size([1, 16, 7, 7])
x = out
out = F.interpolate(x, scale_factor=2, mode='nearest')
out.shape
>>> torch.Size([1, 16, 14, 14])
out = F.interpolate(x, scale_factor=3, mode='nearest')
out.shape
>>> torch.Size([1, 16, 21, 21])
x.shape
>>> torch.Size([1, 16, 7, 7])
layer = nn.ReLU(inplace=True)
out = layer(x)
out.shape
>>> torch.Size([1, 16, 7, 7])
z ~ i = z i − μ σ z ^ i = γ ⊙ z ~ i + β \tilde{z}^i = \frac{z^i - \mu}{\sigma}\\ \hat{z}^i = \gamma \odot \tilde{z}^i + \beta z~i=σzi−μz^i=γ⊙z~i+β
x = torch.rand(100, 16, 784)
layer = nn.BatchNorm1d(16)
out = layer(x)
layer.running_mean
>>> tensor([0.0501, 0.0501, 0.0501, 0.0501, 0.0499, 0.0500, 0.0501, 0.0501, 0.0499, 0.0502, 0.0500, 0.0501, 0.0500, 0.0498, 0.0500, 0.0501])
layer.running_var
>>> tensor([0.9084, 0.9083, 0.9083, 0.9084, 0.9083, 0.9083, 0.9083, 0.9083, 0.9083, 0.9083, 0.9084, 0.9083, 0.9083, 0.9084, 0.9083, 0.9084])
x.shape
>>> torch.Size([1, 16, 7, 7])
layer = nn.BatchNorm2d(16)
out = layer(x)
out.shape
>>> torch.Size([1, 16, 7, 7])
layer.weight
Parameter containing:
>>> tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True)
layer.weight.shape
>>> torch.Size([16])
layer.bias.shape
>>> torch.Size([16])
vars(layer)
>>> {'training': True, '_parameters': OrderedDict([('weight', Parameter containing:
>>> tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
requires_grad=True)), ('bias', Parameter containing:
>>> tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
requires_grad=True))]), '_buffers': OrderedDict([('running_mean', tensor([0.0443, 0.0511, 0.0542, 0.0462, 0.0566, 0.0538, 0.0524, 0.0507, 0.0521,
0.0516, 0.0570, 0.0474, 0.0460, 0.0429, 0.0508, 0.0450])), ('running_var', tensor([0.9069, 0.9097, 0.9070, 0.9081, 0.9086, 0.9102, 0.9069, 0.9080, 0.9093,
0.9101, 0.9081, 0.9092, 0.9083, 0.9079, 0.9072, 0.9071])), ('num_batches_tracked', tensor(1))]), '_non_persistent_buffers_set': set(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict(), 'num_features': 16, 'eps': 1e-05, 'momentum': 0.1, 'affine': True, 'track_running_stats': True}
class ResBlk(nn.Module):
def __init__(self, ch_in, ch_out):
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential()
if ch_out != ch_in:
# [b, ch_in, h, w] ==> [b, ch_out, h, w]
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=1),
nn.BatchNorm2d(ch_out)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = self.extra(x) + out
return
self.net = nn.Sequential(
nn.Conv2d(1, 32, 5, 1, 1)
nn.MaxPool2d(2, 2),
nn.ReLU(True),
nn.BatchNorm2d(32),
nn.Conv2d(32, 64, 3, 1, 1),
nn.ReLU(True),
nn.BatchNorm2d(64),
nn.Conv2d(64, 64, 3, 1, 1),
nn.MaxPool2d(2, 2),
nn.ReLU(True),
nn.BatchNorm2d(64),
nn.Conv2d(64, 128, 3, 1, 1),
nn.ReLU(True),
nn.BatchNorm2d(128)
)
net = nn.Sequential(nn.Linear(4, 2), nn.Linear(2, 2))
list(net.parameters())[0].shape
torch.Size([2, 4])
class BasicNet(nn.Module):
def __init__(self):
super(BasicNet, self).__init__()
self.net = nn.Linear(4, 3)
def forward(self, x):
return self.net(x)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.net = nn.Sequential(BasicNet(),
nn.ReLU(),
nn.Linear(3, 2))
def forward(self, x):
return self.net(x)
device = torch.device('cuda')
net = Net()
net.to(device)
device = torch.device('cuda')
net = Net()
net.to(device)
net.load_state_dect(torch.load('ckpt.mdl'))
# train…
torch.save(net.state_dict(), 'ckpt.mdl')
device = torch.device('cuda')
net = Net()
net.to(device)
net.load_state_dect(torch.load('ckpt.mdl'))
# train…
torch.save(net.state_dict(), 'ckpt.mdl')
# test
net.eval()
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ToTensor(),
])),
batch_size=batch_size, shuffle=True
)
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation([90, 180, 270])
transforms.ToTensor(),
])),
batch_size=batch_size, shuffle=True
)
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation([90, 180, 270])
transforms.Resize([32, 32])
transforms.ToTensor(),
])),
batch_size=batch_size, shuffle=True
)
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation([90, 180, 270])
transforms.Resize([32, 32],
transforms.RandomCrop([28, 28])
transforms.ToTensor(),
])),
batch_size=batch_size, shuffle=True
)