参考网站:https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py
2.1 pytorch测试
# coding=utf-8
# 这是一个用于练习的文档
from __future__ import print_function
import torch
x = torch.rand(5,3)
print("x={}".format(x)) #5行3列随机数
print(torch.empty(5,3))
print(torch.zeros(5,3,dtype=torch.long))
print(torch.tensor([5.5,3]))
x=x.new_ones(5,3,dtype=torch.double)
print("x={}".format(x))
x=torch.rand_like(x,dtype=torch.float) #Returns a tensor with the same size as input that is filled with random numbers from a normal distribution with mean 0 and variance 1
print("x={}".format(x))
print(x.dtype)
print(x.size())
2.2 tensor加法
# tensor加法(pytorch)
y=(torch.rand(5,3))
print("y={}".format(y))
print("x+y={}".format(x+y)) #相加就是对应位置值相加,也可以用torch.add(x,y)
print("torch.add(x,y)={}".format(torch.add(x,y)))
result=torch.empty(5,3)
torch.add(x,y,out=result)
print("result={}".format(result)) #带输出的加法,result必须是与x,y相同的类型
2.3 in-place方法
# in-place方法,即不添加多余变量,直接内部放置
y.add_(x)
print("y={}".format(y)) # y=y+x
# pytorch都可以通过添加_实现in-place操作,如x.copy_(y),x.t_()
print("x={}".format(x))
x.copy_(y)
print("x={}".format(x))
x.t_()
print("x={}".format(x)) # x转置赋给x
# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))
2.4 tensor resize/reshape
# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))
2.5 获取tensor某一element的值
# 获取某一个元素的值
print("x[1][1]={}".format(x[1][1]))
print("x[1][1].item()={}".format(x[1][1].item()))
x=torch.randn(1)
print("x={}".format(x))
print("x.item()={}".format(x.item()))
print("x[0].item()={}".format(x[0].item()))
2.6 转换Torch tensor到numpy
# 转换Torch tensor到numpy
a = torch.ones(6)
print("a={},type is {}".format(a,type(a)))
b = a.numpy()
print("b=a.numpy()={},type is {}".format(b,type(b)))
# numpy中的加法:改变numpy的值,只需要改变Torch tensor对应的值即可
#b.add_(2)#AttributeError: 'numpy.ndarray' object has no attribute 'add_'
print("b+2={}".format(b+2))
a.add_(3)
print("a={},type is {}".format(a,type(a)))
print("b=a.numpy()={},type is {}".format(b,type(b)))
2.7 转换numpy到Torch tensor
# 转换numpy到Torch tensor
import numpy as np
a = np.ones(2)
print("a={},type is {}".format(a,type(a)))
b = torch.from_numpy(a)
print("b=torch.from_numpy(a)={},type is {}".format(b,type(b)))
# 改变numpy,会改变Torch tensor的值吗?经过验证,答案是“会”
np.add(a,1,out=a) # out --- A location into which the result is stored
print("a={},type is {}".format(a,type(a)))
print("b={},type is {}".format(b,type(b)))
2.8 tensor传入GPU
# pytorch cuda tensors
if torch.cuda.is_available():
device = torch.device("cuda")
y = torch.ones_like(x,device=device) #直接创建一个与x大小相同的tensor,放于GPU上
print("\nx={},dtype is {}".format(x,x.dtype))
print("y=torch.ones_like(x,device=device)={},dtype is {}".format(y,y.dtype))
#z = x+y #RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor一个在GPU上,一个在CPU上,无法运算
x = x.to(device) # 将x传到GPU上
print("x=x.to(device)={},dtype is {}".format(x,x.dtype))
z = x + y
print("z=x+y={},dtype is {}".format(z,z.dtype))
3.1 requires_grad和grad_fn
# coding=utf-8
import torch
x = torch.ones(2,2,requires_grad=True)
print("x={}".format(x))
y = x + 3
print("y={},y.grad_fn={}".format(y,y.grad_fn)) #y.grad_fn=
z = x * y * 4
print("z={},z.grad_fn={}".format(z,z.grad_fn)) #z.grad_fn=
3.2 反向传播backward(标量对向量求导)
要实现反向传播,求偏导的自变量(tensor) 必须满足条件:a.requires_grad=True(以下述snippet为例)
# coding=utf-8
import torch
a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad)) #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False
#a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
b = a.sum()
print("b=a.sum()={},b.grad_fn is {}".format(b,b.grad_fn))
# 反向传播backward
#b.backward() # out.backward() is equivalent to out.backward(torch.tensor(1.)) a.requires_grad=False 报错--RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
正确例子如下:
# coding=utf-8
import torch
a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad)) #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False
a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
print("a.requires_grad is {}".format(a.requires_grad)) #满足a.requires_grad=True
print("\na={}".format(a))
# 标量(saclar)对向量(vector)求偏导
c = a.mean()
print("c=a.mean()={}".format(c))
c.backward() #实现了求平均值的函数的反向求导
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[0.2500, 0.2500],[0.2500, 0.2500]])'''
b = a.sum()
b.backward()
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[1.2500, 1.2500],[1.2500, 1.2500]])累加了!'''
如果多次使用backward(),则a.grad会被累加运算!
3.3 范数求解
# 范数求解
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm()<10: #这里默认求解2-范数,也可以通过torch.norm(y,2)求解2-范数
y = y * 2
print("\ny={},y.data={},y.data.norm()={},torch.norm(y,2)={}".format(y,y.data,y.data.norm(),torch.norm(y,2)))
3.4 反向传播backward(向量对向量求导)
理论公式推导可参考矩阵求导
'''
# 向量(vector)对向量(vector)求偏导,backward(),数学上1*3的vector对1*3的vector求导会得到一个3*3的矩阵(vector-Jacobian product),但这里需要加一个向量v
# 可参考https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
'''
v = torch.tensor([1.2,0.4,0.003],dtype=torch.float)
#y.backward() #这是计算标量倒数的方法,报错--RuntimeError: grad can be implicitly created only for scalar outputs
y.backward(v) #传入一个1*3的tensor相当于vector-Jacobian的转置*v的转置
print("x.grad={}".format(x.grad))
x=tensor([ 1.3471, -0.0893, -0.7166], requires_grad=True)
y=tensor([10.7771, -0.7143, -5.7330], grad_fn=
x.grad=tensor([9.6000, 3.2000, 0.0240])
3.5 停止自动求导
# 停止自动求导运算
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
with torch.no_grad():
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # False
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
公式推导可以参考神经网络
4.1 基本要求
A typical training procedure for a neural network is as follows:
weight = weight -learning_rate * gradient
更新网络权重4.2 实现步骤
# coding=utf-8
'''
卷积、全连接在torch.nn,池化在torch.nn.functional
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1,3,2) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
self.conv2 = nn.Conv2d(3,3,2)
# 全连接层
self.fc1 = nn.Linear(3*7*7,5) #3通道,conv2计算1*32*32的矩阵后得到7*7大小的feature map,即上层输出3*7*7个数,有3*7*7个神经元
self.fc2 = nn.Linear(5,4)
self.fc3 = nn.Linear(4,3)
'''
以下为官网设置
'''
#self.conv1 = nn.Conv2d(1,6,5) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
#self.conv2 = nn.Conv2d(6,16,5)
#self.fc1 = nn.Linear(16*5*5,120) #16通道,5*5大小的feature map,即上层有16*5*5个神经元
#self.fc2 = nn.Linear(120,84)
#self.fc3 = nn.Linear(84,3)
def forward(self,x):
#import pdb;pdb.set_trace()
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # 1.卷积;2.relu;3.最大池化(2*2)
x = F.max_pool2d(F.relu(self.conv2(x)),2) # 设置为2,其实就是(2,2),# If the size is a square you can only specify a single number
x = x.view(-1,self.num_flat_features(x)) # reshape/resize,这里就是平铺x成1维
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
#import pdb;pdb.set_trace()
x = self.fc3(x) # 这里就是把全连接层前面的所有神经元(不管2维还是多维)平铺成1维
def num_flat_features(self,x):
size = x.size()[1:] # 除了batch维度外的所有维度
num_features = 1
for s in size:
num_features *= s
return num_features
return x
net = Net() # 网络实例化
print("net:{}".format(net)) # 查看网络构成
param = list(net.parameters())
print("length:{}\nparam[0].size()={}\nparam[1].size()={}\nparam[2].size()={}\nparam[3].size()={}\nparam[4].size()={}\nparam[5].size()={}\n".format(len(param),param[0].size(),param[1].size(),param[2].size(),param[3].size(),param[4].size(),param[5].size())) # 这里的length:10,因为每一个tensor都有weight和bias
'''
# input--The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample
# 就是需要实现对齐,举个例子:a single sample 可能就是3*32*32(nChannels*Height*Width),a mini-batch of samples就是1*3*32*32(nSamples*nChannels*Height*Width))
# 如果输入是a single sample,则需要转换为a mini-batch of samples(虚构一个nSamples的维度)
'''
input = torch.randn(1,1,32,32)
print("input=torch.randn(1,1,32,32)={}".format(input))
out = net(input) # 自动执行forward函数
print("out=net(input)={},size={}".format(out,out.size()))
net.zero_grad() # Zero the gradient buffers of all parameters将所有梯度设置为0,Sets gradients of all model parameters to zero.
#out.backward(torch.randn(1,3),retain_graph=True)
target = torch.tensor([0.8,0.1,0.1],dtype=torch.float)
print("target变换前:{},size={}".format(target,target.size()))
target = target.view(1,-1) # 需要将target转换成与out相同维度
print("target变换后={},size={}".format(target,target.size()))
# loss function(mean-squared error)使用类nn.MSELoss
mse_loss = nn.MSELoss() #1/3*((y0-t0)^2+(y1-t1)^2+(y2-t2)^2),其中y为fc3层计算输出,t为目标标签
#loss = mse_loss(target,out) #loss.grad_fn:这里必须是out,target的顺序
loss = mse_loss(out,target) #loss.grad_fn:这里必须是out,target的顺序
print("loss={}\nloss.grad_fn:{}\nloss.grad_fn.next_functions[0][0]={}\nloss.grad_fn.next_functions[0][0].next_functions[0][0]={}".format(loss,loss.grad_fn,loss.grad_fn.next_functions[0][0],loss.grad_fn.next_functions[0][0].next_functions[0][0]))
# backprop
print("\nbefore zero_grad---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
net.zero_grad()
print("\nbefore backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
'''
# pytorch构建的一个graph中,只能进行一次backward,如果上述过程已经使用过一次,则会报错:RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
'''
loss.backward() # 如果前面使用过backward,则前面的backward添加retain_graph=True,即可
print("\nafter backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
# a simple implementing method 试一下多次迭代!!!!Method 1
learning_rate = 0.01
iter_count = 0
while loss>0.00001:
net.zero_grad()
output = net(input)
loss = mse_loss(output,target)
loss.backward()
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
iter_count = iter_count + 1
if iter_count%30 == 0:
print("第{}次迭代,loss:{}".format(iter_count,loss))
'''
# torch.optim优化,试一下多次迭代!!!! Method 2
iter_count = 0
import torch.optim as optim
while loss>0.000000001:
opt = optim.SGD(net.parameters(),lr=0.01)
opt.zero_grad() # 每一次迭代都需要将梯度缓存改为0,否则会导致梯度叠加问题
output = net(input)
loss = mse_loss(output,target)
loss.backward()
opt.step()
iter_count = iter_count + 1
if iter_count%30 == 0:
print("第{}次迭代,loss:{}".format(iter_count,loss))
'''
5.1 基本步骤
torchvision 下载并载入cifar10的数据
5.2 实现方法
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
'''
#Compose组合tensor到一起,transforms.ToTensor()--转换一个PIL图像到tensor,Convert a PIL Image or numpy.ndarray to tensor;
#transforms.Normalize(mean,std)--规范化一个tensor图像,input[channel] = (input[channel] - mean[channel]) / std[channel]
'''
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform)
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=4,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量
testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testLoader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)
classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
img = img/2 +0.5 #载入的图片input[channel] = (input[channel] - mean[channel]) / std[channel],所以这里是input[channel]=std[channel]*input[channel]+mean[channel]
np_img = img.numpy() #转换为numpy格式
plt.imshow(np.transpose(np_img,(1,2,0)))
plt.savefig("1.jpg")
#dataiter = iter(trainloader)
#images,labels = dataiter.next()
#
##imshow(torchvision.utils.make_grid(images)) # make a grid of images图像网格,images是一个tensor,所以imshow函数里面需要转换为numpy格式的
#print(' '.join("%5s"% classes[labels[i]] for i in range(4)))
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform)
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=1,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量
testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)
classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import torch.nn as nn
import torch.nn.functional as F
class ClassifyNet(nn.Module):
def __init__(self):
super(ClassifyNet,self).__init__()
self.conv1 = nn.Conv2d(3,6,3) #input channels=3 output channels = 5 kernel=3*3
self.conv2 = nn.Conv2d(6,10,3)
self.pool = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(1960,120) #1960 = 1*10*14*14 = batch_size*channels*width*height
self.fc2 = nn.Linear(120,60) #torch.nn.Linear(in_features, out_features, bias=True)
self.fc3 = nn.Linear(60,10)
def forward(self,x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.pool(x)
#import pdb; pdb.set_trace()
x = x.view(-1,1960)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
①这里重新载入数据集,采用batch_size=1,即训练过程保持单张图训练,速度慢。但学习阶段,需要慢慢搞懂每一步!
②fc1的定义需要计算图像计算到这一层的[batchsize,channels,height,width],然后设置当前层的in_features,即输入神经元个数。
③前向传播forward计算到fc1时,需要平铺卷积高维torch tensors。
定义损失函数:
net = ClassifyNet() #实例化网络
print(net)
# define loss
loss_cross = nn.CrossEntropyLoss()
import torch.optim as optim
opt = optim.SGD(net.parameters(),lr=0.001)
running_loss = 0
# train step
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
inputs,labels = data
opt.zero_grad()
outputs = net(inputs)
#import pdb;pdb.set_trace()
loss = loss_cross(outputs,labels)
loss.backward()
opt.step()
running_loss += loss.item()
if i%2000 == 1999:
print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
running_loss = 0
print("Finish Training")
训练图片50000张,迭代50000次:
……
第46000次迭代,loss:0.000776898443698883
第48000次迭代,loss:0.001786381721496582
第50000次迭代,loss:0.0005364646911621094
# test step
testdataiter = iter(testloader)
images,labels = testdataiter.next()
imshow(torchvision.utils.make_grid(images))
print("GT:",' '.join("%5s"% classes[labels[i]] for i in range(4)))
outputs = net(images) # testdataset设置的batchsize为4,则计算得到的outputs也有4个10维输出
_, predicts = torch.max(outputs,1) # Returns the maximum value of each row of the input tensor in the given dimension dim这里在维度序号为1的list中的最大值,即每一个1*10list中的最大值
print(predicts)
print("Predicts:",' '.join("%5s"% classes[predicts[i]] for i in range(4)))
这里仅仅测试了4张图,预测都是正确的。
GT: car bird frog dog
Predicts: car bird frog dog
在测试集上测试:(正确率0.4979)
# test on testdatasets
correct = 0
total = 0
with torch.no_grad(): # 不需要求解梯度
for i,data in enumerate(testloader,0):
images,labels = data
outputs = net(images)
_, predicts = torch.max(outputs,1)
total += labels.size(0)
correct = correct + (predicts == labels).sum().item() #predicts与labels相同则为1,求和即得正确预测的个数
print("The accuracy of classifyNet on {} test images:{}".format(total,correct/total))
# train step
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:{}".format(device))
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
net.to(device) #①将网络放到GPU上
inputs,labels = data
inputs,labels = inputs.to(device),labels.to(device) #②将数据放到GPU上
opt.zero_grad()
outputs = net(inputs)
#import pdb;pdb.set_trace()
loss = loss_cross(outputs,labels)
loss.backward()
opt.step()
running_loss += loss.item()
if i%2000 == 1999:
print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
running_loss = 0
print("Finish Training")
①将网络net放到GPU上;
②将需要训练的数据放到GPU上。
在GPU上训练所需时间:----real 4m8.954s----user 5m22.688s----sys 0m49.932s----
在CPU上训练所需时间:----real 2m15.048s----user 9m12.332s----sys 15m32.658s----
CPU更快!!!奇怪不奇怪!!!官网解释:Why dont I notice MASSIVE speedup compared to CPU? Because your network is realllly small.
6.1 数据构成
数据来源:ImageNet
数据类别:dog 和 cat
数据放置:train文件夹下放2个文件夹(cat 和 dog),每个文件夹分别放各自的图片。val文件夹做同样的操作。但是train和val中放置的图片一般不能有重复的图片。
6.2 数据读取
参考pytorch官网(github)给出的一个例子,数据会被很规范的读入,类似CIFAR10一样,train和val下面的文件夹名字自然会被分为0和1两类:
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
CIFAR10 | myDatasets | |
function | traindatasets=torchvision.datasets.CIFAR10() | mytraindatasets = torchvision.datasets.ImageFolder() |
pytorch structure | Dataset CIFAR10 |
Dataset ImageFolder Number of datapoints: 1876 Root Location: ./data/mydatasets/datasets/train Transforms (if any): Compose( RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ) Target Transforms (if any): None |
DataLoader | torch.utils.data.DataLoader(mytraindatasets, batch_size=1, shuffle=True, num_workers=0) 官网说法:Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset. |
|
获取其中一个数据traindatasets.__getitem__(0) 矩阵是图像,6是类别标签 |
(tensor([[[-0.5373, -0.6627, -0.6078, ..., 0.2392, 0.1922, 0.1608], |
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow tensorboard tensorboardX
因为TensorBoard默认运行端口在6006,如果在docker下直接运行,则使用浏览器访问TensorBoard时,无法访问docker容器下的TensorBoard服务器,只能访问主机的TensorBoard。因此需要把docker容器的6006端口映射到主机,进而访问主机的TensorBoard服务器时,间接访问docker容器的6006端口服务资源。(-p 6006:6006)
sudo nvidia-docker run --rm -it -v /media/lab/873821cf-d234-44cf-bd63-4372eac823a1/pytorch/:/home/pytorch -p 6006:6006 pytorch:v0 bash
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
from visualization import visualize
import torch.nn as nn
import torch.nn.functional as F
class ClassifyNet(nn.Module):
def __init__(self):
super(ClassifyNet,self).__init__()
self.conv1 = nn.Conv2d(3,4,3) #input channels=3 output channels = 4 kernel=3*3
self.pool = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(49284,60) #1960 = 1*4*111*111 = batch_size*channels*width*height
self.fc2 = nn.Linear(60,2)
def forward(self,x):
x = F.relu(self.conv1(x))
x = self.pool(x)
#import pdb;pdb.set_trace()
x = x.view(-1,49284)
x = self.fc1(x)
x = self.fc2(x)
return x
net = ClassifyNet()
print(net)
'''
visualization
method:tensorbordX
'''
from tensorboardX import SummaryWriter
with SummaryWriter(comment="Net") as w:
w.add_graph(net,(torch.rand(1,3,224,224),))
程序运行之后,当前程序所在目录下会生成一个runs目录
tensorboard --logdir=runs/
host_addr:6006