在使用PyTorch搭建深度神经网络后对网络的训练时,可视化对网络的训练具有重要的作用。而在实际的使用中TensorboardX比PyTorch自带的visdom用的人更多。在网络上各种使用教程的指导下,我终于成功用上了TensorboardX。现在网络上各种教程的版本都较老,我在使用时遇到各种Bug,为了方便同学们的使用,特此总结了一下较新版本的使用过程,希望能帮助到大家。我的Pytorch的版本为1.1.0,TensorboardX版本为1.6。废话不多说上教程了:
1.安装
tensorflow |
scipy |
numpy |
torchvision |
2.在Pycharm的当前目录中新建logger.py
import tensorflow as tf
import numpy as np
import scipy.misc
try:
from StringIO import StringIO # Python 2.7
except ImportError:
from io import BytesIO # Python 3.x
class Logger(object):
def __init__(self, log_dir):
"""Create a summary writer logging to log_dir."""
self.writer = tf.summary.FileWriter(log_dir)
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step)
def image_summary(self, tag, images, step):
"""Log a list of images."""
img_summaries = []
for i, img in enumerate(images):
# Write the image to a string
try:
s = StringIO()
except:
s = BytesIO()
scipy.misc.toimage(img).save(s, format="png")
# Create an Image object
img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
height=img.shape[0],
width=img.shape[1])
# Create a Summary value
img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
# Create and write Summary
summary = tf.Summary(value=img_summaries)
self.writer.add_summary(summary, step)
def histo_summary(self, tag, values, step, bins=1000):
"""Log a histogram of the tensor of values."""
# Create a histogram using numpy
counts, bin_edges = np.histogram(values, bins=bins)
# Fill the fields of the histogram proto
hist = tf.HistogramProto()
hist.min = float(np.min(values))
hist.max = float(np.max(values))
hist.num = int(np.prod(values.shape))
hist.sum = float(np.sum(values))
hist.sum_squares = float(np.sum(values ** 2))
# Drop the start of the first bin
bin_edges = bin_edges[1:]
# Add bin edges and counts
for edge in bin_edges:
hist.bucket_limit.append(edge)
for c in counts:
hist.bucket.append(c)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
self.writer.add_summary(summary, step)
self.writer.flush()
3.新建测试文件test_tensorboardx.py,将以下内容复制到这个文件当中,运行这个文件。
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from logger import Logger
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# MNIST dataset
dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
# Data loader
data_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=100,
shuffle=True)
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size=784, hidden_size=500, num_classes=10):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet().to(device)
logger = Logger('./logs')
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
data_iter = iter(data_loader)
iter_per_epoch = len(data_loader)
total_step = 5000
# Start training
for step in range(total_step):
# Reset the data_iter
if (step + 1) % iter_per_epoch == 0:
data_iter = iter(data_loader)
# Fetch images and labels
images, labels = next(data_iter)
images, labels = images.view(images.size(0), -1).to(device), labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Compute accuracy
_, argmax = torch.max(outputs, 1)
accuracy = (labels == argmax.squeeze()).float().mean()
if (step + 1) % 100 == 0:
print('Step [{}/{}], Loss: {:.4f}, Acc: {:.2f}'
.format(step + 1, total_step, loss.item(), accuracy.item()))
# ================================================================== #
# Tensorboard Logging #
# ================================================================== #
# 1. Log scalar values (scalar summary)
info = {'loss': loss.item(), 'accuracy': accuracy.item()}
for tag, value in info.items():
logger.scalar_summary(tag, value, step + 1)
# 2. Log values and gradients of the parameters (histogram summary)
for tag, value in model.named_parameters():
tag = tag.replace('.', '/')
logger.histo_summary(tag, value.data.cpu().numpy(), step + 1)
logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), step + 1)
# 3. Log training images (image summary)
info = {'images': images.view(-1, 28, 28)[:10].cpu().numpy()}
for tag, images in info.items():
logger.image_summary(tag, images, step + 1)
4.运行结束之后,当前文件夹会生成一个logs文件夹,日志信息都放在这个文件夹当中,打开cmd运行窗口,并进入logs文件夹所在的路径。
输入以下命令,
tensorboard --logdir='./logs' --port=6006
5.打开Chrome浏览器,进入http://localhost:6006
注:两个测试文件来自https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/04-utils/tensorboard