Environment
TensorBoard 是 TensorFlow 中常用的可视化工具。从 1.1 版本开始,PyTorch 支持使用 TensorBoard。
使用 conda install tesnorboard
安装(坑:若 import torch.utils.tensorboard
时提示没有 past 包,则需要安装 future 包(运行 conda install future
))
torch.utils.tensorboard.SummaryWriter
对象,调用一些实例方法,运行后需要可视化的指标将被记录到硬盘中的 event file$ tensorboard --logdir=./
读取 event file 中的内容,在网页端显示TensorBoard 可视化都是通过 SummaryWriter
类的各种实例方法来实现的,一切从此开始
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(
log_dir=None,
comment='',
purge_step=None,
max_queue=10,
flush_secs=120,
filename_suffix='',
)
实例方法 add_graph
将模型以计算图的形式进行可视化
writer.add_graph(
model, # 模型
input_to_model=None, # 输入的数据 batch,随意 torch.randn 一个
verbose=False
)
实例方法 add_scalar
和 add_scalars
,前者记录单个标量,如可以记录训练损失函数值的变化;后者能够在一幅图中显示多个标量指标,如可以同时记录训练和验证的准确度变化。
writer.add_scalar(
tag, # 可视化曲线图的标题
scalar_value, # 需要记录的标量值,曲线图的纵坐标
global_step=None, # 曲线图的横坐标,通常为对应的迭代数
walltime=None
)
writer.add_scalars(
main_tag, # 可视化曲线图的标题
tag_scalar_dict, # 需要可视化的标量的名字和值 {name1: val1, name2: val2, ...}
global_step=None,
walltime=None,
)
实例方法 add_histogram
绘制直方图,可以用来查看神经网络权重或梯度的分布。
writer.add_histogram(
tag,
values,
global_step=None,
bins='tensorflow',
walltime=None,
max_bins=None,
)
例如可视化一个线性回归模型的训练过程
import os
import torch
from torch.nn import Module, Linear
from torch.nn import MSELoss
from torch.optim import SGD
from torch.utils.tensorboard import SummaryWriter
torch.manual_seed(0)
# ========== data ==========
inputs = torch.linspace(-10, 10, steps=50).unsqueeze_(0)
target = 2. * inputs + 1. + torch.randn(50) * 10 # add some noises
# ========== model ==========
class Net(Module):
def __init__(self):
super(Net, self).__init__()
self.fc = Linear(50, 50)
self._init_weights()
def forward(self, x):
return self.fc(x)
def _init_weights(self):
for m in self.modules():
if isinstance(m, Linear):
m.weight.data.normal_()
m.bias.data.zero_()
# ========== pre-train =========
net = Net()
criterion = MSELoss()
optimizer = SGD(net.parameters(), lr=0.0003, momentum=0.9)
# optimizer = SGD(net.parameters(), lr=0.0003, momentum=0.9, weight_decay=0.01) # try L2 regularization
# ========== train ==========
writer = SummaryWriter(log_dir=os.path.join(os.curdir, 'logs'))
input_tensor = torch.randn(1, 50)
writer.add_graph(net, input_tensor)
for epoch in range(1, 11):
optimizer.zero_grad()
output = net(inputs)
loss = criterion(output, target)
loss.backward()
optimizer.step()
# record training loss
writer.add_scalar('train_loss', loss.item(), epoch)
# record distribution of weights
for name, param in net.named_parameters():
layer, attr = os.path.splitext(name)
if 'fc' in layer and 'weight' in attr:
writer.add_histogram('{}_{}'.format(layer, attr[1:]), param, epoch)
writer.add_histogram('{}_{}_grad'.format(layer, attr[1:]), param.grad, epoch)
writer.close()
logs user$ tensorboard --logdir=./
TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.0.0 at http://localhost:6006/ (Press CTRL+C to quit)
接着,将 http://localhost:6006/ 地址在浏览器打开
实例方法 add_image
记录 torch tensor 类型的图片数据
writer.add_image(
tag,
img_tensor,
global_step=None,
walltime=None,
dataformats='CHW', # 图片数据的格式,'CHW', 'HWC' or 'HW'
)
注意,对于传入的 tensor 图像数据 img_tensor
,若所有像素值都在 0 到 1 之间,则将自动乘以 255 放大至 0 到 255;若存在有 > 1 的像素值,则不缩放。
若需要可视化一组图片,如格式为 (B, C, H, W) 的一组 tensor,那么可以先使用 torchvision.utils.make_grid
函数,该函数能够将一个 batch 的图片搞成子图的形式
img_grid = torchvision.utils.make_grid(
tensor, # 图像数据,B x C x H x W 形式
nrow=8, # 一行显示 nrow 个
padding=2, # 图像间距(像素单位)
normalize=False, # 是否将像素值标准化,默认为 False。通常网络中的图片像素值比较小,要可视化之前需要标准化到0~255。
range=None, # 截断范围。譬如,若像素值范围在 0~255,传入 (100, 200),则小于 100 的都会变为 100,大于 200 的都会变为 200。
scale_each=False, # 是否单张图维度标准化,默认为 False
pad_value=0, # 子图之间 padding 的像素值
)
例如可视化输入CNN模型的图片
import os
import torch
import torchvision
from torch.nn import Module, Sequential, Conv2d, Linear, ReLU
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torch.utils.tensorboard import SummaryWriter
# ========== data ==========
inputs = torch.stack(
(torch.zeros((3, 7, 7), dtype=torch.float32),
200 * torch.ones((3, 7, 7), dtype=torch.float32),
127 * torch.ones((3, 7, 7), dtype=torch.float32))
)
targets = torch.tensor((0, 1, 2), dtype=torch.long)
# ========== model ==========
class Net(Module):
def __init__(self, num_classes=3):
super(Net, self).__init__()
self.features = Sequential(
Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
ReLU()
)
self.classifier = Linear(32 * 7 * 7, num_classes)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
return self.classifier(x)
# ========== pre-train ==========
net = Net()
criterion = CrossEntropyLoss()
optimizer = SGD(net.parameters(), lr=0.01, momentum=0.9)
# ========== train ==========
log_dir = os.path.join(os.curdir, 'logs')
writer = SummaryWriter(log_dir)
for epoch in range(1, 4):
# record input images
img_grid = torchvision.utils.make_grid(tensor=inputs, nrow=2, padding=2)
writer.add_image('input_images', img_grid, epoch, dataformats='CHW')
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
writer.close()
接着,运行 tensorboard 然后到网页端查看。