7 pytorch实例——tensorboard使用

由于需要使用tensorboard,因此需要先安装tensorflow

主程序 main.py 文件如下:

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from logger import Logger


# GPU设置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 加载MNIST数据集 
dataset = torchvision.datasets.MNIST(root='data', 
                                     train=True, 
                                     transform=transforms.ToTensor(),  
                                     download=True)

data_loader = torch.utils.data.DataLoader(dataset=dataset, 
                                          batch_size=100, 
                                          shuffle=True)


# 定义全连接网络
class NeuralNet(nn.Module):
    def __init__(self, input_size=784, hidden_size=500, num_classes=10):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

model = NeuralNet().to(device)

logger = Logger('./logs')

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)  

data_iter = iter(data_loader)
iter_per_epoch = len(data_loader)
total_step = 50000

# 开始训练
for step in range(total_step):
    
    # 重新设置迭代器,获取下一批数据
    if (step+1) % iter_per_epoch == 0:
        data_iter = iter(data_loader)

    # 获取图像和标签
    images, labels = next(data_iter)
    images, labels = images.view(images.size(0), -1).to(device), labels.to(device)
    
    # 前向传递
    outputs = model(images)
    loss = criterion(outputs, labels)
    
    # 后向传递和参数更新
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 计算准确率
    _, argmax = torch.max(outputs, 1)
    accuracy = (labels == argmax.squeeze()).float().mean()

    if (step+1) % 100 == 0:
        print ('Step [{}/{}], Loss: {:.4f}, Acc: {:.2f}' 
               .format(step+1, total_step, loss.item(), accuracy.item()))

        # ================================================================== #
        #                          登陆  Tensorboard                         #
        # ================================================================== #

        # 1. 记录标量值
        info = { 'loss': loss.item(), 'accuracy': accuracy.item() }

        for tag, value in info.items():
            logger.scalar_summary(tag, value, step+1)

        # 2. 记录参数的值和梯度
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            logger.histo_summary(tag, value.data.cpu().numpy(), step+1)
            logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), step+1)

        # 3. 记录训练集每次迭代获取到的图像(取前10张)
        info = { 'images': images.view(-1, 28, 28)[:10].cpu().numpy() }

        for tag, images in info.items():
            logger.image_summary(tag, images, step+1)

代码解读

  • 为了能够在运行的过程中实时查看当前准确率和损失函数,需要将这些变量记录到tensorboard中;
  • 以上代码采用一个全连接神经网络实现,包含1个隐层;

接下来,需要完成日志文件的代码,在同目录下新建logger.py文件,文件内容如下:

import tensorflow as tf
import numpy as np
import scipy.misc 
try:
    from StringIO import StringIO  # Python 2.7
except ImportError:
    from io import BytesIO         # Python 3.x


class Logger(object):
    
    def __init__(self, log_dir):
        """Create a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def scalar_summary(self, tag, value, step):
        """Log a scalar variable."""
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def image_summary(self, tag, images, step):
        """Log a list of images."""

        img_summaries = []
        for i, img in enumerate(images):
            # Write the image to a string
            try:
                s = StringIO()
            except:
                s = BytesIO()
            scipy.misc.toimage(img).save(s, format="png")

            # Create an Image object
            img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                       height=img.shape[0],
                                       width=img.shape[1])
            # Create a Summary value
            img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))

        # Create and write Summary
        summary = tf.Summary(value=img_summaries)
        self.writer.add_summary(summary, step)
        
    def histo_summary(self, tag, values, step, bins=1000):
        """Log a histogram of the tensor of values."""

        # Create a histogram using numpy
        counts, bin_edges = np.histogram(values, bins=bins)

        # Fill the fields of the histogram proto
        hist = tf.HistogramProto()
        hist.min = float(np.min(values))
        hist.max = float(np.max(values))
        hist.num = int(np.prod(values.shape))
        hist.sum = float(np.sum(values))
        hist.sum_squares = float(np.sum(values**2))

        # Drop the start of the first bin
        bin_edges = bin_edges[1:]

        # Add bin edges and counts
        for edge in bin_edges:
            hist.bucket_limit.append(edge)
        for c in counts:
            hist.bucket.append(c)

        # Create and write Summary
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
        self.writer.add_summary(summary, step)
        self.writer.flush()

在运行main.py文件的过程中在cmd终端中输入命令:

tensorboard --logdir='./logs' --port=6006

打开tensorboard功能。然后根据提示,如:TensorBoard 1.9.0 at http://S5G50PGQ5D868G9:6006 (Press CTRL+C to quit)

打开浏览器访问 http://S5G50PGQ5D868G9:6006即可查看

你可能感兴趣的:(Pytorch简明教程,Pytorch简明教程)