一个典型的神经网络的训练过程大致分为以下几个步骤:
1.首先定义神经网络的结构,并且定义各层的权重参数的规模和初始值。
2.然后将输入数据分成多个批次输入神经网络。
3.将输入数据通过整个网络进行计算。
4.每次迭代根据计算结果和真实结果的差值计算损失。
5.根据损失对权重参数进行反向求导传播。
6.更新权重值,更新过程使用下面的公式:
weight = weight + learning_rate * gradient
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
y = x.pow(2) + 0.2 * torch.rand(x.size())
x, y = Variable(x), Variable(y)
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()
class Net(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_features, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net = Net(1, 10, 1)
print(net)
plt.ion() # 打开交互模式,展示动态图; 使用plt.ion()这个函数,使matplotlib的显示模式转换为交互(interactive)模式
plt.show()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_fc = torch.nn.MSELoss()
for i in range(100):
prediction = net(x)
loss = loss_fc(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 5 == 0:
# plot and show learning process
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data, fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)
# 显示前关掉交互模式
plt.ioff() # 在plt.show()之前一定不要忘了加plt.ioff(),如果不加,界面会一闪而过,并不会停留
plt.show()
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
n_data = torch.ones(100, 2)
x0 = torch.normal(2*n_data, 1)
y0 = torch.zeros(100)
x1 = torch.normal(-2*n_data, 1)
y1 = torch.ones(100)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)
y = torch.cat((y0, y1),).type(torch.LongTensor)
x, y = Variable(x), Variable(y)
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()
class Net(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_features, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net = Net(2, 10, 2)
print(net)
plt.ion() # 打开交互模式,展示动态图; 使用plt.ion()这个函数,使matplotlib的显示模式转换为交互(interactive)模式
plt.show()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
loss_fc = torch.nn.CrossEntropyLoss()
for i in range(100):
out = net(x)
loss = loss_fc(out, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 2 == 0:
# plot and show learning process
plt.cla()
prediction = torch.max(F.softmax(out), 1)[1] # 输出最大值的索引
pred_y = prediction.numpy().squeeze()
target_y = y.data.numpy()
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0)
accuracy = sum(pred_y==target_y) / 200
plt.text(1.5, -4, 'Accuracy=%.4f' % accuracy, fontdict={'size': 20, 'color': 'red'})
plt.pause(0.1)
# 显示前关掉交互模式
plt.ioff() # 在plt.show()之前一定不要忘了加plt.ioff(),如果不加,界面会一闪而过,并不会停留
plt.show()
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
y = x.pow(2) + 0.2 * torch.rand(x.size())
x, y = Variable(x, requires_grad=False), Variable(y, requires_grad=False)
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()
def save():
net1 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_fc = torch.nn.MSELoss()
for i in range(100):
prediction = net1(x)
loss = loss_fc(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.save(net1, 'net.pkl') # 保存整个神经网络的结构和模型参数
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存神经网络的模型参数
# plot result
prediction = net1(x)
plt.figure(1, figsize=(10, 3))
plt.subplot(131)
plt.title('net1')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
def restore_net():
net2 = torch.load('net.pkl')
prediction = net2(x)
# plot result
plt.title('net2')
plt.subplot(132)
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
def restore_params():
net3 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
net3.load_state_dict(torch.load('net_params.pkl'))
prediction = net3(x)
# plot result
plt.title('net3')
plt.subplot(133)
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.show()
# save net
save()
# restore entire net
restore_net()
# restore only net parameters
restore_params()
import torch
from torch.autograd import Variable
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
class Model(torch.nn.Module):
"""
构建一个继承了torch.nn.Module的新类,
来完成对前向传播函数和后向传播函数的重写
"""
def __init__(self):
super(Model, self).__init__()
def forward(self, input, w1, w2):
"""
forward函数实现了模型的前向传播中的矩阵运算,
"""
x = torch.mm(input, w1)
x = torch.clamp(x, min=0) # 相当于 Relu
x = torch.mm(x, w2)
return x
def backward(self):
"""
backward实现了模型的后向传播中的自动梯度计算,后向传播如果没有特别的需求,则在一般情况下不用进行调整
"""
pass
model = Model()
# 用 Variable类对 Tensor数据类型变量进行封装的操作
# requires_grad参数,这个参数的赋值类型是布尔型,如果requires_grad的值是False,那么表示该变量在进行自动梯度计算的过程中不会保留梯度值。
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
# 将输入的数据x和输出的数据y的requires_grad参数均设置为False,
# 这是因为这两个变量并不是我们的模型需要优化的参数,而两个权重w1和w2的requires_grad参数的值为True
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad=True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad=True)
# 定义模型的训练次数和学习速率
epoch_n = 30
learning_rate = 1e-6
# 模型训练和参数优化
for epoch in range(epoch_n):
y_pred = model(x, w1, w2) # 模型通过“y_pred =model(x, w1, w2)”来完成对模型预测值的输出
loss = (y_pred - y).pow(2).sum() # 检测输出的loss是0维
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss.data))
loss.backward() # 误差反向传播
# loss.backward(),这个函数的功能在于让模型根据计算图自动计算每个节点的梯度值并根据需求进行保留
w1.data -= learning_rate*w1.grad.data # 参数更新
w2.data -= learning_rate*w2.grad.data
w1.grad.data.zero_() # 将本次计算得到的各个参数节点的梯度值通过grad.data.zero_()全部置零
w2.grad.data.zero_() # 如果不置零,则计算 的梯度值会被一直累加,这样就会影响到后续的计算
# 搭建一个包含了卷积层、激活 函数、池化层、全连接层的卷积神经网络
# 各个部分的功能实现 依然是通过torch.nn中的类来完成的
# 卷积层使用 torch.nn.Conv2d 类方法来搭建;
# 激活层使用 torch.nn.ReLU 类方法来搭建;
# 池化层使用 torch.nn.MaxPool2d 类方法来搭建;
# 全连接层使用 torch.nn.Linear 类方法.
import torch
from torchvision import datasets, transforms
from torch.autograd import Variable
import time
# torchvision.transforms.Compose类看作一种容器,它能够同时对多种数据变换进行组合。
# 在torch.transforms中提供了丰富的类对载入的数据进行变换
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.5],[0.5])])
data_train = datasets.MNIST(root="./data/",
transform=transform,
train=True,
download=True)
"""
root用于指定数据集在下载之后的存放路径
transform用于指定导入数据集时需要对数据进行哪种变换操作
train用于指定在数据集下载完成后需要载入哪部分数据,True——训练集部分,False——测试集部分
"""
data_test = datasets.MNIST(root="./data/",
transform=transform,
train=False,
download=True)
"""
在数据下载完成并且载入后,我们还需要对数据进行装载。
可以将数据的载入理解为对图片的处理,在处理完成后,就需要将这些图片打包好送给我们的模型进行训练了,
而装载就是这个打包的过程
"""
# 对数据的装载使用的是torch.utils.data.DataLoader类
data_loader_train = torch.utils.data.DataLoader(dataset=data_train,
batch_size=64,
shuffle=True)
data_loader_test = torch.utils.data.DataLoader(dataset=data_test,
batch_size=64,
shuffle=True)
class Model(torch.nn.Module):
"""
实现卷积神经网络模型搭建
"""
def __init__(self):
super(Model, self).__init__()
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(stride=2, kernel_size=2)
)
self.dense = torch.nn.Sequential(
torch.nn.Linear(14*14*128, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024, 10)
)
def forward(self, x):
x = self.conv1(x)
x = x.view(-1, 14*14*128) # x.view(-1, 14*14*128),对参数实现扁平化,
x = self.dense(x) # 通过self.dense定义的全连接进行最后的分类
return x
# 创建实例
model = Model()
cost = torch.nn.CrossEntropyLoss()
# torch.optim.Adam类作为我们的模型参数的优化函数,在torch.optim.Adam类中输入的是被优化的参数和学习速率的初始值,
# Adam优化函数还有一个强大的功能,就是可以对梯度更新使用到的学习速率进行自适应调节
optimizer = torch.optim.Adam(model.parameters())
# 超参数
n_epochs = 5
# 训练模型
for epoch in range(n_epochs):
running_loss = 0.0
running_correct = 0
print("Epoch:{}/{}".format(epoch, n_epochs))
print("-"*10)
start_time = time.time()
for data in data_loader_train:
X_train, y_train = data
X_train, y_train = Variable(X_train), Variable(y_train)
outputs = model(X_train)
_, pred = torch.max(outputs, 1)
optimizer.zero_grad() # 调用optimzer.zero_grad来完成对模型参数梯度的归零
loss = cost(outputs, y_train)
loss.backward()
optimizer.step() # optimzer.step,使用计算得到的梯度值对各个节点的参数进行梯度更新
running_loss += loss.data
running_correct += torch.sum(pred == y_train.data)
testing_correct = 0
for data in data_loader_test:
X_test, y_test = data
X_train, y_train = Variable(X_test), Variable(y_test)
outputs = model(X_test)
_, pred = torch.max(outputs, 1)
testing_correct += torch.sum(pred == y_test.data)
end_time = time.time()
print("Time:{}".format(start_time-end_time))
print("Loss is:{:.4f}, Train Accuary is:{:.4f}%, Test Accuracy is:{:.4f}%".format(
running_loss/len(data_train), 100*running_correct/len(data_train), 100*testing_correct/len(data_test)
))
import torch
import torchvision
from torchvision import datasets, transforms, models
import os
import matplotlib.pyplot as plt
from torch.autograd import Variable
import time
# 数据载入
data_dir = "DogsVSCats"
data_transform = {x:transforms.Compose([transforms.Scale([64, 64]),
transforms.ToTensor()])
for x in ["train", "valid"]}
image_datasets = {x:datasets.ImageFolder(root=os.path.join(data_dir,x),
transform=data_transform[x])
for x in ["train", "valid"]}
# 数据装载
dataloader = {x:torch.utils.data.DataLoader(dataset=image_datasets[x],
batch_size=16,
shuffle=True)
for x in ["train", "valid"]}
# 获取一个批次的数据并进行数据预览和分析
X_example, y_example = next(iter(dataloader["train"]))
print(u"X_example个数{}".format(len(X_example)))
print(u"y_example个数{}".format(len(y_example)))
# 模型搭建和参数优化
class Models(torch.nn.Module):
"""继承神经网络中的类"""
def __init__(self):
super(Models, self).__init__()
# 初始化网络
self.Conv = torch.nn.Sequential(torch.nn.Conv2d(...),
..., ..., ...)
self.Classes = torch.nn.Sequential(torch.nn.Conv2d(...),
..., ..., ...)
def forward(self, input):
x = self.COnv(input)
x = x.voew(-1, ... * ... * ...) # x = x.view(-1, 4) # -1表示维数自动判断,此输出的维度为:-1(自动判断) × ...*...*...
x = self.Classes(x)
return x
model = Models() # 创建实例对象
# print(model)
# 定义好模型的损失函数和对参数进行优化的优化对象
loss_f = torch.nn.CrossEntropyLoss() # 创建实例对对象,不需要传入参数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器函数
epoch_n = 10 # 超参数
time_open = time.time()
for epoch in range(epoch_n):
print("Epoch:{}/{}".format(epoch, epoch_n - 1))
print("-" * 10)
for phase in ["train", "valid"]:
if phase == "train":
print("Training...")
model.train(True)
else:
print("Validing...")
model.train(False)
running_loss = 0.0
running_corrects = 0
for batch, data in enumerate(dataloader[phase], 1):
X, y = data
X, y = Variable(X), Variable(y)
y_pred = model(X)
_, pred = torch.max(y_pred.data,
1) # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引),元素赋值给_ , 返回的索引复制给pred
optimizer.zero_grad() # 梯度置零
loss = loss_f(y_pred, y)
if phase == "train":
loss.backward()
optimizer.step()
running_loss += loss.data[0]
running_corrects += torch.sum(pred == y.data)
if batch % 500 == 0 and phase == "train":
print("Batch:{}, Train Loss:{:.4f}, Train ACC:{:.4f}".format(batch, running_loss / batch,
100 * running_corrects / (16 * batch)))
epoch_loss = running_loss * 16 / len(image_datasets[phase])
epoch_acc = 100 * running_corrects / len(image_datasets[phase])
print("{} Loss:{:.4f} Acc:{:.4f}%".format(phase, epoch_loss, epoch_acc))
time_end = time.time() - time_open # 计算时间
print(time_end)
模型的迁移
将在模型训练的过程中需要计算的参数全部迁移至 GPUs上
print(torch.cuda.is_available()) # 确认GPUs硬件是否可用; 返回True则说明GPUs已经具备了被使用的全部条件
Use_gpu = torch.cuda.is_avaliable()
if Use_gpu:
model = model.cuda()
epoch_n = 10
time_open = time.time()
...
...
for batch, data in enumerate(dataloader[phase],1):
X, y = data
if Use_gpu:
X, y =Variable(X,cuda()), Variable(y,cuda())
else:
X,y = Variable(X), Variable(y)
...
...
time_end = time.time() - time_open
print(time_end)
import torch
import torchvision
from torchvision import datasets, transforms, models
import os
import matplotlib.pyplot as plt
from torch.autograd import Variable
import time
# 数据载入
data_dir = "DogsVSCats"
data_transform = {x:transforms.Compose([transforms.Scale([224, 224]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])])
for x in ["train", "valid"]}
image_datasets = {x:datasets.ImageFolder(root=os.path.join(data_dir,x),
transform=data_transform[x])
for x in ["train", "valid"]}
# 数据装载
dataloader = {x:torch.utils.data.DataLoader(dataset=image_datasets[x],
batch_size=16,
shuffle=True)
for x in ["train", "valid"]}
# 获取一个批次的数据并进行数据预览和分析
X_example, y_example = next(iter(dataloader["train"]))
example_classes = image_datasets["train"].classes
index_classes = image_datasets["train"].class_to_idx
print(u"X_example个数{}".format(len(X_example)))
print(u"y_example个数{}".format(len(y_example)))
model = models.vgg16(pretrained=True)
for param in model.parameters():
# 冻结操作
param.requires_grad = False # parma.requires_grad全部设置为False,这样对应的参数将不计算梯度,当然也不会进行梯度更新了
# 定义新的全连接层结构并重新赋值给model.classifier
model.classifier = torch.nn.Sequential(torch.nn.Liner(25088,4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096, 2))
Use_gpu = torch.cuda.is_available()
if Use_gpu:
model = model.cuda()
# 定义好模型的损失函数和对参数进行优化的优化对象
cost = torch.nn.CrossEntropyLoss() # 创建实例对对象,不需要传入参数
optimizer = torch.optim.Adam(model.classifier.parameters()) # 优化器函数
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=0.00001)
epoch_n = 5 # 超参数
time_open = time.time()
for epoch in range(epoch_n):
print("Epoch:{}/{}".format(epoch, epoch_n - 1))
print("-" * 10)
for phase in ["train", "valid"]:
if phase == "train":
print("Training...")
model.train(True)
else:
print("Validing...")
model.train(False)
running_loss = 0.0
running_corrects = 0
for batch, data in enumerate(dataloader[phase], 1):
X, y = data
if Use_gpu:
X, y = Variable(X.cuda()), Variable(y.cuda)
else:
X, y = Variable(X), Variable(y)
y_pred = model(X)
_, pred = torch.max(y_pred.data,
1) # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引),元素赋值给_ , 返回的索引复制给pred
optimizer.zero_grad() # 梯度置零
loss = loss_f(y_pred, y)
if phase == "train":
loss.backward()
optimizer.step()
running_loss += loss.data[0]
running_corrects += torch.sum(pred == y.data)
if batch % 500 == 0 and phase == "train":
print("Batch:{}, Train Loss:{:.4f}, Train ACC:{:.4f}".format(batch, running_loss / batch,
100 * running_corrects / (16 * batch)))
epoch_loss = running_loss * 16 / len(image_datasets[phase])
epoch_acc = 100 * running_corrects / len(image_datasets[phase])
print("{} Loss:{:.4f} Acc:{:.4f}%".format(phase, epoch_loss, epoch_acc))
time_end = time.time() - time_open # 计算时间
print(time_end)
import torch
import torchvision
from torchvision import datasets, transforms, models
import os
import matplotlib.pyplot as plt
from torch.autograd import Variable
import time
path = "dog_vs_cat"
transform =transforms.Compose([transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])])
# 数据载入
data_dir = "DogsVSCats"
data_transform = {x:transforms.Compose([transforms.Scale([224, 24]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])])
for x in ["train", "valid"]}
image_datasets = {x:datasets.ImageFolder(root=os.path.join(data_dir,x),
transform=data_transform[x])
for x in ["train", "valid"]}
# 数据装载
dataloader = {x:torch.utils.data.DataLoader(dataset=image_datasets[x],
batch_size=16,
shuffle=True)
for x in ["train", "valid"]}
# 获取一个批次的数据并进行数据预览和分析
X_example, y_example = next(iter(dataloader["train"]))
example_classes = image_datasets["train"].classes
index_classes = image_datasets["train"].class_to_idx
print(u"X_example个数{}".format(len(X_example)))
print(u"y_example个数{}".format(len(y_example)))
model = models.resnet50(pretrained=True)
Use_gpu = torch.cuda.is_available()
for param in model.parameters():
# 冻结操作
param.requires_grad = False # parma.requires_grad全部设置为False,这样对应的参数将不计算梯度,当然也不会进行梯度更新了
# 定义新的全连接层结构并重新赋值给model.classifier
model.fc = torch.nn.Linear(2048, 2)
if Use_gpu:
model = model.cuda()
# 定义好模型的损失函数和对参数进行优化的优化对象
cost = torch.nn.CrossEntropyLoss() # 创建实例对对象,不需要传入参数
optimizer = torch.optim.Adam(model.fc.parameters()) # 优化器函数
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.00001)
epoch_n = 5 # 超参数
time_open = time.time()
for epoch in range(epoch_n):
print("Epoch:{}/{}".format(epoch, epoch_n - 1))
print("-" * 10)
for phase in ["train", "valid"]:
if phase == "train":
print("Training...")
model.train(True)
else:
print("Validing...")
model.train(False)
running_loss = 0.0
running_corrects = 0
for batch, data in enumerate(dataloader[phase], 1):
X, y = data
if Use_gpu:
X, y = Variable(X.cuda()), Variable(y.cuda)
else:
X, y = Variable(X), Variable(y)
y_pred = model(X)
_, pred = torch.max(y_pred.data, 1) # torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引),元素赋值给_ , 返回的索引复制给pred
optimizer.zero_grad() # 梯度置零
loss = loss_f(y_pred, y)
if phase == "train":
loss.backward()
optimizer.step()
running_loss += loss.data[0]
running_corrects += torch.sum(pred == y.data)
if batch % 500 == 0 and phase == "train":
print("Batch:{}, Train Loss:{:.4f}, Train ACC:{:.4f}".format(batch, running_loss / batch,
100 * running_corrects / (16 * batch)))
epoch_loss = running_loss * 16 / len(image_datasets[phase])
epoch_acc = 100 * running_corrects / len(image_datasets[phase])
print("{} Loss:{:.4f} Acc:{:.4f}%".format(phase, epoch_loss, epoch_acc))
time_end = time.time() - time_open # 计算时间
print(time_end)
# -*- UFT-8 -*-
# 图片的内容损失
# 图片的风格损失
"""
首先,我们需要获取一张内容图片和一张风格图片;
然后定义两个 度量值,一个度量叫作内容度量值,另一个度量叫作风格度量值,
其中的内容度量值用于衡量图片之间的内容差异程度,风格度量值用于衡量图片之间的风格差异程度;
最后,建立神经网络模型,对内容图片中的 内容和风格图片的风格进行提取,以内容图片为基准将其输入建立的模型中,
并不断调整内容度量值和风格度量值,让它们趋近于最小,
最后输出的图片就是内容与风格融合的图片。
"""
import torch
# 定义图像的内容损失
class Content_loss(torch.nn.Module):
def __init__(self, weight, target):
"""
weight 是我们设置的一个权重参数,用来控制内容和风格对最后合成图像的影响程度
target是通过卷积获取到的输入图像中的内容
:param weight:
:param target:
"""
super(Content_loss, self).__init__()
self.weight = weight
self.target = target.detach()*weight # target.detach()用于对提取到的内容进行锁定,不需要进行梯度
self.loss_fn = torch.nn.MSELoss() # 内容度量值可以使用均方误差作为损失函数
def forward(self, input):
"""
forward函数用于计算输入图像和内容图像之间的
:param input: input代表输入图像
:return:
"""
self.loss = self.loss_fn(input*self.weight, self.target)
return input
def backward(self):
"""
backward函数根据计算得到的损失值进行后向传播,并返回损失值。
:return:
"""
self.loss.backward(retain_graph=True)
return self.loss
# 图像的风格损失
class Style_loss(torch.nn.Module):
def __init__(self, weight, target):
super(Style_loss, self).__init__()
self.weight = weight
self.target = target.detach()*weight
self.loss_fn = torch.nn.MSELoss()
self.gram = Gram_matrix()
def forward(self, input):
self.Gram = self.gram(input.clone())
self.Gram.mul_(self.weight)
self.loss = self.loss_fn(self.Gram, self.target)
return input
def backeard(self):
self.loss.backward(retain_graph=True)
return self.loss
# 格拉姆矩阵(Gram matrix)
class Gram_matrix(torch.nn.Module):
"""
通过卷积神经网络提取了风格图片的风格,这些风格其实是由数字组成的,
数字的大小代表了图片中风格的突出程度,而Gram矩阵是矩阵的内积运算,
在运算过后输入到该矩阵的特征图中的大的数字会变得更大,
这就相当于图片的风格被放大了,放大的风格再参与损失计算,
便能够对最后的合成图片产生更大的影响。
"""
def forward(self, input):
a, b, c, d = input.size()
feature = input.view(a*b, c*d)
gram = torch.mm(feature, feature.t())
return gram.div(a*b*c*d)
# 模型搭建和参数优化
# 迁移一个卷积神经网络的特征提取部分
cnn = models.vgg16(pretrained=True).features # 迁移了一个VGG16架构的卷积神经网络模型的特征提取部分
content_layer = ["Conv_3"] # 定义了content_layer和style_layer,分别指定了我们需要在整个卷积过程中的哪一层提取内容,以及在哪一层提取风格。
style_layer = ["conv_1", "Conv_2", "Conv_3", "Conv_4"]
content_losses = [] # content_losses和style_losses是两个用于保存内容损失和风格损失的列表
style_losses = []
content_weight = 1 # content_weight和style_weight指定了内容损失和风格损失对我们最后得到的融合图片的影响权重
style_weight = 1
# 搭建图像风格迁移模型的代码如下
new_model = torch.nn.Sequential()
model = copy.deepcopy(cnn)
gram = gram_matrix()
if use_gpu:
new_model = new_model.cuda()
gram = gram.cuda()
index = 1
for layer in list(model)[:8]: # for layer in list(model)[:8]指明了我们仅仅用到迁移模型特征提取部分的前8层
if isinstance(layer, torch.nn.Conv2d): # isinstance的用法是用来判断一个量是否是相应的类型,接受的参数一个是对象加一种类型
name = "Conv_" + str(index)
new_model.add_module(name, layer) # add_module方法向空的模型中加入指定的层次模块
if name in content_layer:
target = new_model(content_img).clone()
content_loss = Content_loss(content_weight, target)
new_model.add_module("content_loss_"+str(index), content_loss)
content_losses.append(content_loss)
if name in style_layer:
target = new_model(style_img).clone()
target = gram(target)
style_loss = Content_loss(style_weight, target)
new_model.add_module("content_loss_" + str(index), style_loss)
content_losses.append(style_loss)
if isinstance(layer, torch.nn.ReLU):
name = "Relu_" + str(index)
new_model.add_module(name, layer)
index = index + 1
if isinstance(layer, torch.nn.MaxPool2d):
name = "MaxPool_" + str(index)
new_model.add_module(name, layer)
# 参数优化部分的代码
input_img = content_img.clone()
parameter = torch.nn.Parameter(input_img.data)
optimizer = torch.optim.LBFGS([parameter])
# 训练新定义的卷积神经网络
# 在完成模型的搭建和优化函数的定义后,就可以开始进行模型的训 练和参数的优化了
epoch_n = 300
epoch = [0]
while epoch[0] <= epoch_n:
def closure():
optimizer.zero_grad()
style_score = 0
content_score = 0
parameter.data.clamp_(0, 1)
new_model(parameter)
for sl in style_losses:
style_score += sl.backward()
for cl in content_losses:
content_score += cl.backeard()
epoch[0] += 1
if epoch[0] % 50 == 0:
print("Epoch:{}, Style Loss:{:.4f}, Content Loss:{:.4f}".
format(epoch[0], style_score.data[0], content_score.data[0]))
return style_score + content_score
optimizer.step(closure)
# 实现图像风格迁移的完整代码
import torch
import torchvision
from torchvision import transforms, models
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
import copy
transform = transforms.Compose([transforms.Scale([224, 224]),
transforms.ToTensor()])
def loading(path=None):
img = Image.open(path)
img = transform(img)
img = img.unsqueeze(0)
return img
content_img = loading("images/4.jpg")
content_img = Variable(content_img).cuda()
style_img = loading("images/1.jpg")
style_img = Variable(style_img).cuda()
class Content_loss(torch.nn.Module):
def __init__(self, weight, target):
super(Content_loss, self).__init__()
self.weight = weight
self.target = target.detach()*weight
self.loss_fn = torch.nn.MSELoss()
def forward(self, input):
self.loss = self.loss_fn(input*self.weight, self.target)
return input
def backward(self):
self.loss.backward(retain_graph=True)
return self.loss
class Gram_matrix(torch.nn.Module):
def forward(self, input):
a, b, c, d = input.size()
feature = input.view(a*b, c*d)
gram = torch.mm(feature, feature.t())
return gram.div(a*b*c*d)
class Style_loss(torch.nn.Module):
def __init__(self, weight, target):
super(Style_loss, self).__init__()
self.weight = weight
self.target = target.detach()*weight
self.loss_fn = torch.nn.MSELoss()
self.gram = Gram_matrix()
def forward(self, input):
self.Gram = self.gram(input.clone())
self.Gram.mul_(self.weight)
self.loss = self.loss_fn(self.Gram, self.target)
return input
def backeard(self):
self.loss.backward(retain_graph=True)
return self.loss
use_gpu = torch.cuda.is_available()
cnn = models.vgg16(pretrained=True).features
if use_gpu:
cnn = cnn.cuda()
model = copy.deepcopy(cnn)
content_layer = ["Conv_3"]
style_layer = ["conv_1", "Conv_2", "Conv_3", "Conv_4"]
content_losses = []
style_losses = []
content_weight = 1
style_weight = 1000
new_model = torch.nn.Sequential()
# model = copy.deepcopy(cnn)
gram = Gram_matrix()
if use_gpu:
new_model = new_model.cuda()
gram = gram.cuda()
index = 1
for layer in list(model)[:8]: # for layer in list(model)[:8]指明了我们仅仅用到迁移模型特征提取部分的前8层
if isinstance(layer, torch.nn.Conv2d): # isinstance的用法是用来判断一个量是否是相应的类型,接受的参数一个是对象加一种类型
name = "Conv_" + str(index)
new_model.add_module(name, layer) # add_module方法向空的模型中加入指定的层次模块
if name in content_layer:
target = new_model(content_img).clone()
content_loss = Content_loss(content_weight, target)
new_model.add_module("content_loss_"+str(index), content_loss)
content_losses.append(content_loss)
if name in style_layer:
target = new_model(style_img).clone()
target = gram(target)
style_loss = Content_loss(style_weight, target)
new_model.add_module("content_loss_" + str(index), style_loss)
content_losses.append(style_loss)
if isinstance(layer, torch.nn.ReLU):
name = "Relu_" + str(index)
new_model.add_module(name, layer)
index = index + 1
if isinstance(layer, torch.nn.MaxPool2d):
name = "MaxPool_" + str(index)
new_model.add_module(name, layer)
# 参数优化部分的代码
input_img = content_img.clone()
parameter = torch.nn.Parameter(input_img.data)
optimizer = torch.optim.LBFGS([parameter])
# 训练新定义的卷积神经网络
# 在完成模型的搭建和优化函数的定义后,就可以开始进行模型的训 练和参数的优化了
epoch_n = 300
epoch = [0]
while epoch[0] <= epoch_n:
def closure():
optimizer.zero_grad()
style_score = 0
content_score = 0
parameter.data.clamp_(0, 1)
new_model(parameter)
for sl in style_losses:
style_score += sl.backward()
for cl in content_losses:
content_score += cl.backeard()
epoch[0] += 1
if epoch[0] % 50 == 0:
print("Epoch:{}, Style Loss:{:.4f}, Content Loss:{:.4f}".
format(epoch[0], style_score.data[0], content_score.data[0]))
return style_score + content_score
optimizer.step(closure)
训练代码
# 通过线性变换实现自动编码器模型
# 线性变换的方式仅使用线性映射和激活函数作为神经网络结构的主要组成部分
import torch
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import time
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])])
dataset_train = datasets.MNIST(root="./data/",
transform=transform,
train=True,
download=True)
# root用于指定数据集在下载之后的存放路径
# transform用于指定导入数据集时需要对数据进行哪种变换操作
# train用于指定在数据集下载完成后需要载入哪部分数据,True——训练集部分,False——测试集部分
dataset_test = datasets.MNIST(root="./data/",
transform=transform,
train=False,
download=True)
# 在数据下载完成并且载入之后,还需要对数据进行装载,数据的装入理解为对图片的处理。装载是一个打包的过程
train_load = torch.utils.data.DataLoader(dataset=dataset_train,
batch_size=4,
shuffle=True)
test_load = torch.utils.data.DataLoader(dataset=dataset_test,
batch_size=4,
shuffle=True)
# iter和next来获取一个批次的图片数据和其对应的图片标签,
# 然后使用torchvision.utils中的make_grid类方法将一个批次的图片构造成网格模式
images, label = next(iter(train_load))
print(images.shape)
images_example = torchvision.utils.make_grid(images)
images_example = images_example.numpy().transpose(1, 2,
0) # numpy和transpose完成原始数据类型的转换和数据维度的交换,这样才能够使用Matplotlib绘制出正确的图像
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
images_example = images_example * std + mean
plt.imshow(images_example)
plt.show()
shape = images_example.shape
noisy_images = images_example + np.random.randn(
*shape) # 因为shape是一个元组类型,直接给randn(shape) 会报错, 写成np.tandom.randn(32,32,32)数字类型则不会报错
# *shape 也不会报错
noisy_images = np.clip(noisy_images, 0., 1.)
plt.imshow(noisy_images)
plt.show()
# 定义线性编码器
class AutoEncoder(torch.nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
self.encoder = torch.nn.Sequential(
torch.nn.Linear(28 * 28, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 32),
torch.nn.ReLU()
)
self.decoder = torch.nn.Sequential(
torch.nn.Linear(32, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 28 * 28)
)
def forward(self, input):
output = self.encoder(input)
output = self.decoder(output)
return output
model = AutoEncoder()
print(model)
# 训练定义好的模型
optimizer = torch.optim.Adam(model.parameters())
loss_f = torch.nn.MSELoss()
epoch_n = 5
for epoch in range(epoch_n):
time_start = time.time()
running_loss = 0.0
print("Epoch:{}/{}".format(epoch, epoch_n))
print("-" * 10)
for data in train_load:
X_train, _ = data
noisy_X_train = X_train + 0.5 * torch.randn(*X_train.shape)
noisy_X_train = torch.clamp(noisy_X_train, 0., 1.)
X_train, noisy_X_train = Variable(X_train.view(-1, 28 * 28)), Variable(noisy_X_train.view(-1, 28 * 28))
train_pre = model(noisy_X_train)
loss = loss_f(train_pre, X_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.data
time_end = time.time() - time_start
print("The time of Epoch:{}".format(time_end))
print("Loss is:{:.4f}".format(running_loss / len(dataset_train)))
print("-" * 20)
if epoch % 4 == 0:
"""
# 保存整个神经网络的结构和模型参数
torch.save(mymodel, 'mymodel.pkl')
# 只保存神经网络的模型参数
torch.save(mymodel.state_dict(), 'mymodel_params.pkl')
导入模型
mymodel = torch.load('mymodel.pkl')
"""
print("Save model")
# 保存整个神经网络的结构和模型参数
torch.save(model, 'model.pkl')
# Run
"""
Epoch:0/5
----------
The time of Epoch:120.15813112258911
Loss is:0.0315
--------------------
Epoch:1/5
----------
The time of Epoch:115.89295601844788
Loss is:0.0228
--------------------
Epoch:2/5
----------
The time of Epoch:126.67610955238342
Loss is:0.0210
--------------------
Epoch:3/5
----------
The time of Epoch:141.03569293022156
Loss is:0.0202
--------------------
Epoch:4/5
----------
The time of Epoch:153.3457634449005
Loss is:0.0197
--------------------
Save model
"""
测试代码
import torch
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])])
dataset_train = datasets.MNIST(root="./data/",
transform=transform,
train=True,
download=True)
# root用于指定数据集在下载之后的存放路径
# transform用于指定导入数据集时需要对数据进行哪种变换操作
# train用于指定在数据集下载完成后需要载入哪部分数据,True——训练集部分,False——测试集部分
dataset_test = datasets.MNIST(root="./data/",
transform=transform,
train=False,
download=True)
# 在数据下载完成并且载入之后,还需要对数据进行装载,数据的装入理解为对图片的处理。装载是一个打包的过程
test_load = torch.utils.data.DataLoader(dataset=dataset_test,
batch_size=4,
shuffle=True)
X_test, _ = next(iter(test_load))
img1 = torchvision.utils.make_grid(X_test)
img1 = img1.numpy().transpose(1, 2, 0) # numpy和transpose完成原始数据类型的转换和数据维度的交换,这样才能够使用Matplotlib绘制出正确的图像
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
img1 = img1 * std + mean
noisy_X_test = img1 + 0.5 * np.random.randn(*img1.shape)
noisy_X_test = np.clip(noisy_X_test, 0., 1.)
plt.figure()
plt.imshow(noisy_X_test)
img2 = X_test + 0.5 * torch.randn(*X_test.shape)
img2 = torch.clamp(img2, 0., 1.)
img2 = Variable(img2.view(-1, 28*28))
# 导入模型
model = torch.load('model.pkl')
test_pred = model(img2)
img_test = test_pred.data.view(-1, 1, 28, 28)
img2 = torchvision.utils.make_grid(img_test)
img2 = img2.numpy().transpose(1, 2, 0)
img2 = img2 * std + mean
img2 = np.clip(img2, 0., 1.)
plt.figure()
plt.imshow(img2)
参考书籍《深度学习之PyTorch实战计算机视觉》