由于需要使用tensorboard,因此需要先安装tensorflow
主程序 main.py 文件如下:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from logger import Logger
# GPU设置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 加载MNIST数据集
dataset = torchvision.datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
data_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=100,
shuffle=True)
# 定义全连接网络
class NeuralNet(nn.Module):
def __init__(self, input_size=784, hidden_size=500, num_classes=10):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet().to(device)
logger = Logger('./logs')
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
data_iter = iter(data_loader)
iter_per_epoch = len(data_loader)
total_step = 50000
# 开始训练
for step in range(total_step):
# 重新设置迭代器,获取下一批数据
if (step+1) % iter_per_epoch == 0:
data_iter = iter(data_loader)
# 获取图像和标签
images, labels = next(data_iter)
images, labels = images.view(images.size(0), -1).to(device), labels.to(device)
# 前向传递
outputs = model(images)
loss = criterion(outputs, labels)
# 后向传递和参数更新
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 计算准确率
_, argmax = torch.max(outputs, 1)
accuracy = (labels == argmax.squeeze()).float().mean()
if (step+1) % 100 == 0:
print ('Step [{}/{}], Loss: {:.4f}, Acc: {:.2f}'
.format(step+1, total_step, loss.item(), accuracy.item()))
# ================================================================== #
# 登陆 Tensorboard #
# ================================================================== #
# 1. 记录标量值
info = { 'loss': loss.item(), 'accuracy': accuracy.item() }
for tag, value in info.items():
logger.scalar_summary(tag, value, step+1)
# 2. 记录参数的值和梯度
for tag, value in model.named_parameters():
tag = tag.replace('.', '/')
logger.histo_summary(tag, value.data.cpu().numpy(), step+1)
logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), step+1)
# 3. 记录训练集每次迭代获取到的图像(取前10张)
info = { 'images': images.view(-1, 28, 28)[:10].cpu().numpy() }
for tag, images in info.items():
logger.image_summary(tag, images, step+1)
代码解读:
接下来,需要完成日志文件的代码,在同目录下新建logger.py文件,文件内容如下:
import tensorflow as tf
import numpy as np
import scipy.misc
try:
from StringIO import StringIO # Python 2.7
except ImportError:
from io import BytesIO # Python 3.x
class Logger(object):
def __init__(self, log_dir):
"""Create a summary writer logging to log_dir."""
self.writer = tf.summary.FileWriter(log_dir)
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step)
def image_summary(self, tag, images, step):
"""Log a list of images."""
img_summaries = []
for i, img in enumerate(images):
# Write the image to a string
try:
s = StringIO()
except:
s = BytesIO()
scipy.misc.toimage(img).save(s, format="png")
# Create an Image object
img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
height=img.shape[0],
width=img.shape[1])
# Create a Summary value
img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
# Create and write Summary
summary = tf.Summary(value=img_summaries)
self.writer.add_summary(summary, step)
def histo_summary(self, tag, values, step, bins=1000):
"""Log a histogram of the tensor of values."""
# Create a histogram using numpy
counts, bin_edges = np.histogram(values, bins=bins)
# Fill the fields of the histogram proto
hist = tf.HistogramProto()
hist.min = float(np.min(values))
hist.max = float(np.max(values))
hist.num = int(np.prod(values.shape))
hist.sum = float(np.sum(values))
hist.sum_squares = float(np.sum(values**2))
# Drop the start of the first bin
bin_edges = bin_edges[1:]
# Add bin edges and counts
for edge in bin_edges:
hist.bucket_limit.append(edge)
for c in counts:
hist.bucket.append(c)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
self.writer.add_summary(summary, step)
self.writer.flush()
在运行main.py文件的过程中在cmd终端中输入命令:
tensorboard --logdir='./logs' --port=6006
打开tensorboard功能。然后根据提示,如:TensorBoard 1.9.0 at http://S5G50PGQ5D868G9:6006 (Press CTRL+C to quit)
打开浏览器访问 http://S5G50PGQ5D868G9:6006即可查看