在PyTorch中,torch.Tensor是存储和变换数据的主要工具
#创建一个5*3的未初始化的tensor
x = torch.empty(5,3)
#创建一个5*3的随机初始化的tensor
x = torch.rand(5,3)
#创建一个5*3的long型全0的tensor
x = torch.zeros(5,3,dtype=torch.long)
#直接根据数据创建
x = torch.tensor([5,3])
#算术操作
print(torch.add(x,y))
#索引:索引出来的结果与原数据共享内存,即修改一下,另一个也会跟着修改
y = x[0,:]
y += 1
print(y)
print(x[0,:]) #源tensor也被改了
#改变形状
y = x.view(15)
z = x.view(-1,5) # -1所指的维度可以根据其他维度的值推出来
print(x.size(),y.size(),z.size())
x = torch.arange(1,3).view(1,2)
y = torch.arange(1,4).view(3,1)
print(x + y)
#tensor转numpy
a = torch.ones(5)
b = a.numpy()
print(a,b)
#numpy转tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
print(a,b)
if torch.cuda.is_available():
device = torch.device("cuda") #GPU
y = torch.ones_like(x,device=device) #直接创建一个在GPU上的
x = x.to(device) #等价于 .to("cuda")
z = x + y
print(z)
print(z.to("cpu",torch.double)) #to()还可以同时更改数据类型
线性回归输出是一个连续值,因此适用于回归问题
import torch.utils.data as Data
batch_size = 10
#将训练数据的特征和标签组合
dataset = Data.TensorDataset(features,labels)
#随机读取小批量
data_iter = Data.DataLoader(dataset,batch_size,shuffle=True)
class LinearNet(nn.Module):
def __init__(self,n_feature):
super(LinearNet,self).__init__()
self.linear = nn.Linear(n_feature,1)
#forward 定义前向传播
def forward(self,x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
print(net) #使用print可以打印出网络的结构
from torch.nn import init
init.normal_(net[0].weight,mean=0,std=0.01)
init.constant_(net[0].bias,val=0)
loss = nn.MSELoss()
import torch.optim as optim
optimizer = optim.SGD(net.parameters(),lr=0.03)
print(optimizer)
num_epoches = 3
for epoch in range(1,num_epochs + 1):
for x,y in data_iter:
output = net(x)
l = loss(output,y.view(-1,1))
optimizer.zero_grad()
l.backward()
optimizer.step()
print('spoch %d,loss: %f' % (epoch,l.item())
softmax回归的输出单元从一个变成了多个,且引入了softmax运算使输出更适合离散值的预测和训练
num_inputs = 784
num_outputs = 10
class LinearNet(nn.Module):
def __init__(self,num_inputs,num_outputs):
super(LinearNet,self).__init__()
self.linear = nn.Linear(num_inputs,num_outputs)
def forward(self,x):
y = self.linear(x.view(x.shape[0],-1))
return y
net = LinearNet(num_inputs,num_outputs)
#使用均值为0,标准差为0.01的正态分布随机初始化模型的权重参数
init.normal_(net.linear.weight,mean=0,std=0.01)
init.constant_(net.linear.bias,val=0)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)
num_epochs,lr = 5,0.1
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,
optimizer=None):
for epoch in range(num_epochs):
train_l_sum,train_acc_sum,n = 0.0,0.0,0
for x,y in train_iter:
y_hat = net(x)
l = loss(y_hat,y).sum()
#梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
d2l.sgd(params,lr,batch_size)
else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter,net)
print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f' %
(epoch + 1,train_l_sum/n,train_acc_sum/n,test_acc))
train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,batch_size,[w,b],lr)
多层感知机在单层神经网络的基础上引入了一到多个隐藏层,隐藏层位于输入层和输出层之间
num_inputs,num_outputs,num_hiddens=784,10,256
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs,num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens,num_outputs),
)
for params in net.parameters():
init.normal_(params,mean=0,std=0.01)
batch_size = 256
train_iter,test_iter =d2l.load_data_fashion_mnist(batch_size)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.5)
num_epochs = 5
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,optimizer)
import torch
from torch import nn
#MLP类中无需定义反向传波函数,系统将通过自动求梯度而自动生成反向传播所需的backward函数
class MLP(nn.Module):
#声明带有模型参数的层,这里声明了两个全连接层
def __init__(self,**kwargs):
#调用MLP父类block的构造函数来进行必要的初始化,这样在构造实例时还可以指定其他函数
super(MLP,self).__init__(**kwargs)
self.hidden = nn.Linear(784,256) #隐藏层
self.act = nn.ReLU()
self.output = nn.Linear(256,10) #输出层
#定义模型的前向计算,即如何根据输入x计算返回所需要的模型输出
def forward(self,x):
a = self.act(self.hidden(x))
return self.output(a)
Sequential类
#下面实现一个与Sequential类有相同功能的MySequential类
#可以帮助我们更加清晰地理解Sequential的工作机制
class MySequential(nn.Module):
from collections import OrderedDict
def __init__(self,*args):
super(MySequential,self).__init__()
if len(args) == 1 and isinstance(args[0],OrderedDict):
for key,module in args[0].items():
self.add_module(key,module)
else:
for idx,module in enumerate(args):
self.add_module(str(idx),module)
def forward(self,input):
for module in self._modules.values():
input = module(input)
return input
#我们用MySequential类来实现前面描述的MLP类,并使用随机初始化的模型做一次前向计算
net = MySequential(
nn.Linear(784,256),
nn.ReLU(),
nn.Linear(256,10),
)
print(net)
net(x)
ModuleList类
net = nn.ModuleList([nn.Linear(784,256),nn.ReLU()])
net.append(nn.Linear(256,10)) #类似list的append操作
print(net[-1]) #类似list的索引访问
print(net)
ModuleDict类
net = nn.ModuleDict({
'linear':nn.Linear(784,256),
'act':nn.ReLU(),
})
net['output'] = nn.Linear(256,10) #添加
print(net['linear']) #访问
print(net.output)
print(net)
class FancyMLP(nn.Module):
def __init__(self,**kwargs):
super(FancyMLP,self).__init__(**kwargs)
self.rand_weight = torch.rand((20,20),requires_grad=False) #不可训练参数(常数参数)
self.linear = nn.Linear(20,20)
def forward(self,x):
x = self.linear(x)
#使用创建的常数参数,以及nn.functional中的relu函数和mm函数
x = nn.functional.relu(torch.mm(x,self.rand_weight.data) + 1)
#复用全连接层,等价于两个全连接层共享参数
x = self.linear(x)
#控制流,这里我们需要调用item函数来返回标量进行比较
while x.norm().item() > 1:
x /= 2
if x.norm().item() < 0.8:
x *= 10
return x.sum()