import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torch.nn.functional as F
from matplotlib.font_manager import FontProperties
from mpl_toolkits.mplot3d import Axes3D
# Ubuntu system font path
font = FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc')
PI = np.pi
class DynamicNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(DynamicNet, self).__init__()
self.input_linear = torch.nn.Linear(D_in, H)
self.output_linear = torch.nn.Linear(H, D_out)
def forward(self, x):
h_relu = self.input_linear(x).clamp(min=0)
y_pred = self.output_linear(h_relu)
return y_pred
N, D_in, H, D_out = 100, 1, 100, 1
x = torch.unsqueeze(torch.linspace(-PI, PI, 100), dim=1)
y = torch.sin(x) + 0.2 * torch.rand(x.size())
print("x size: {}".format(x.size()))
print("y size: {}".format(y.size))
model = DynamicNet(D_in, H, D_out)
criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
print("model inside: {}".format(model))
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
print(var_name, "\t", optimizer.state_dict()[var_name])
x size: torch.Size([100, 1])
y size:
model inside:
DynamicNet(
(input_linear): Linear(in_features=1, out_features=100, bias=True)
(output_linear): Linear(in_features=100, out_features=1, bias=True)
)
Model's state_dict:
input_linear.weight torch.Size([100, 1])
input_linear.bias torch.Size([100])
output_linear.weight torch.Size([1, 100])
output_linear.bias torch.Size([1])
Optimizer's state_dict:
state {}
param_groups [{'lr': 0.0001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [140069562969328, 140069562969400, 140069562969472, 140069562969544]}]
(1) 神经网络结构:输入为维度为[100, 1]即100组数据,每组数据中有一个输入,代表输入为单输入变量.
(2) 神经网络内部结构:
参数 | 描述 |
---|---|
in_features | 神经网络输入数据数量,本次为1和100,即单输入和100输入(隐藏层) |
out_features | 神经网络输出数据数量,本次为100和1,即100输出(隐藏层)和单输出 |
bias | 是否有偏置向,True则有,否则为无 |
(3) 正常神经网络结构
结构描述 | 维度 |
---|---|
输入层 | [100, 1],100组数据,每组数据中有一个数据,表示单输入 |
输入层 ↦ \mapsto ↦隐藏层 | [1, 100],隐藏层有100个节点,即权重100个,偏置100个 |
隐藏层 ↦ \mapsto ↦ 输出层 | [100, 1],输出数据维度为1,即单输出,偏置1个 |
(4) PyTorch中神经网络结构:
结构描述 | 维度 |
---|---|
输入层 ↦ \mapsto ↦隐藏层权重 | [100, 1] |
输入层 ↦ \mapsto ↦隐藏层偏置 | [100] |
隐藏层 ↦ \mapsto ↦输出层权重 | [1, 100] |
隐藏层 ↦ \mapsto ↦输出层偏置 | [1] |
import torch
N, D_in, H, D_out = 100, 1, 100, 1
x = torch.unsqueeze(torch.linspace(-PI, PI, 100), dim=1)
y = torch.sin(x) + 0.2 * torch.rand(x.size())
class DynamicNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(DynamicNet, self).__init__()
self.input_linear = torch.nn.Linear(D_in, H)
self.output_linear = torch.nn.Linear(H, D_out)
def forward(self, x):
h_relu = self.input_linear(x).clamp(min=0)
y_pred = self.output_linear(h_relu)
return y_pred
model = DynamicNet(D_in, H, D_out)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
checkpoint = torch.load("./pytorch_models/pre_sin_state_dict.pth")
model.load_state_dict(checkpoint["model_state_dict"])
print("model structure: {}".format(model.modules()))
print("model dict: {}".format(model.state_dict()))
for name in model.state_dict():
print("model node name: {}".format(name))
model structure:
model dict: OrderedDict([('input_linear.weight', tensor([[ 0.6214],
[-0.5627],
[ 0.0381],
...
[ 0.0678]])), ('input_linear.bias', tensor([-0.3924, 0.7454, 0.8201, 0.2507, 0.6988, 0.8026, 0.1359, -0.0758,
0.5269, 0.2195, 0.5729, -0.9007, 0.4940, -0.2502, 0.5416, -0.4938,
...
0.2432, -0.1625, -0.8684, -0.0433])), ('output_linear.weight', tensor([[-0.0078, -0.1481, -0.0208, -0.0123, -0.1144, 0.1286, -0.0427, -0.1011,
0.0800, -0.0469, -0.0094, -0.3129, -0.1281, 0.0780, -0.0076, 0.1603,
...
0.0296, 0.0402, -0.0111, 0.0254]])), ('output_linear.bias', tensor([0.1300]))])
model node name: input_linear.weight
model node name: input_linear.bias
model node name: output_linear.weight
model node name: output_linear.bias
state_dict()
,模型参数结构是OrderDict即"字典","key"为网络结构名称,如权重input_linear.weight
和偏置input_linear.bias
,"value"为PyTorch
数据结构Tensor.keys = model.state_dict().keys()
values = model.state_dict().values()
for key, value in zip(keys, values):
print(key, value)
(1) PyTorch神经网络结构构建方式与正常方式一致,但是网络内部结构:权重则为正常网络结构的转置
.
(2) PyTorch模型结构为字典,可利用对字典的遍历方式获取模型参数.
(3) Tensorflow模型结构为列表.
Tensorflow对比:
Tensorflow基础知识:模型保存与载入深度解析(二)
(一)Tensorflow搭建神经网络
[参考文献]
[1]https://pytorch.org/tutorials/beginner/saving_loading_models.html
[2]https://pytorch.org/docs/stable/torch.html
[3]https://pytorch.org/docs/stable/tensors.html
[4]https://pytorch.org/docs/stable/nn.html