PyTorch 中所有神经网络的核心是 autograd
包,autograd
包为张量上的所有操作提供了自动求导。torch.Tensor
是这个包的核心类。如果设置 .requires_grad
为 True
,那么将会追踪所有对于该张量的操作。 当完成计算后通过调用 .backward()
,自动计算所有的梯度, 这个张量的所有梯度将会自动积累到 .grad
属性。
import torch
x = torch.ones(2,2,requires_grad = True)
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)
"""
tensor([[4.5000, 4.5000],
[4.5000, 4.5000]])
"""
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1,6,5)
self.conv2 = nn.Conv2d(6,16,5)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,84)
self.fc3 = nn.Linear(84,10)
def forward(self,x):
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv2(x)),(2))
x = x.view(-1,self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self,x):
size = x.size()[1:]
num_features= 1
for s in size:
num_features*=s
return num_features
net = Net()
print(net)
params = list(net.parameters())
#print(params)
input = torch.randn(1,1,32,32)
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output,target)
net.zero_grad() # 清除梯度
print('\nconv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)
optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
print('\n',net.conv1.bias)
optimizer.step()
print(net.conv1.bias)
"""
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0152, 0.0140, 0.0029, 0.0044, -0.0089, -0.0060])
Parameter containing:
tensor([-0.1782, -0.0445, -0.0111, 0.0683, 0.1568, 0.1192],
requires_grad=True)
Parameter containing:
tensor([-0.1782, -0.0445, -0.0111, 0.0683, 0.1568, 0.1192],
requires_grad=True)
"""
拿conv2d举例,在nn中conv2d是个类,torch.nn.functional中是方法,nn中也是调用的torch.nn.functional的conv2d方法,涉及参数的比如卷积层和全连接层,带有参数的都是通过nn的模块来实现(不然每次定义卷积操作都要添加对weight和bias的定义很麻烦,所以nn中提前把重复的工作定义好了,内部还是调用了functional),relu和maxpooling等不需要参数训练的工作都在functional。(有时候又不需要对weight和bias操作的简单操作就可以共functional了,就比如relu和maxpooling)
所以说还是需要两种形式的,就说nn其实是在functional上的在包装。
https://blog.csdn.net/qq_21578849/article/details/85240797
https://blog.csdn.net/weixin_40087578/article/details/87186613
torch1.0.0
torchvision0.20
mnist数据下载需要科学上网,或者将单独下载好的四个文件所在的文件路径,替换掉脚本的数据加载部分,如图。可以在我github里找到数据
https://github.com/AishuaiYao/PyTorch
mnist.py
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision import datasets,transforms
from torchsummary import summary
import myNet
batch_size = 128
epochs = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_data = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose(
[transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
test_data = datasets.MNIST('./data', train=False, transform=transforms.Compose(
[transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
train_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size, shuffle=True)
# model = myNet.ConvNet().to(device)
model = myNet.ResNet().to(device)
summary(model,(1,28,28))
optimizer = optim.Adam(model.parameters())
def train(model,device,train_loader,optimizer,epoch):
model.train()
for batch_idx,(data,target) in enumerate(train_loader):
data,target = data.to(device),target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output,target)
loss.backward()
optimizer.step()
if (batch_idx)%30 == 0:
print('train {} epoch : {}/{} \t loss : {:.6f}'.format(
epoch,batch_idx*len(data),len(train_loader.dataset),loss.item()))
def test(model,device,test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():#如果.requires_grad=True但是你又不希望进行autograd的计算, 那么可以将变量包裹在 with torch.no_grad()中
for data,target in test_loader:
data,target = data.to(device), target.to(device)
output = model(data)
test_loss += F.cross_entropy(output,target,reduction='sum').item()
pred = output.max(1,keepdim = True)[1]
correct +=pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nvalid loss : {:.4f} \t accuracy : {:.3f}%\n'.format(
test_loss,100. * correct / len(test_loader.dataset)))
for epoch in range(epochs):
train(model,device,train_loader,optimizer,epoch)
test(model,device,test_loader)
myNet.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,10,5)
self.conv2 = nn.Conv2d(10,20,3)
self.fc1 = nn.Linear(20*10*10,500)
self.fc2 = nn.Linear(500,10)
def forward(self, x):
in_size = x.size(0)
out = self.conv1(x)
out = F.relu(out)
out = F.max_pool2d(out,2,2)
out = self.conv2(out)
out = F.relu(out)
out = out.view(in_size,-1)#torch.view: 可以改变张量的维度和大小,与Numpy的reshape类似
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
out = F.log_softmax(out,dim = 1)
return out
class BasicBlock(nn.Module):
def __init__(self,inchannel,outchannel,s = 1):
nn.Module.__init__(self)
self.left = nn.Sequential(
nn.Conv2d(inchannel,outchannel,kernel_size=3,stride = s,padding=1),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace = True),
nn.Conv2d(outchannel,outchannel,kernel_size=3,stride = 1,padding=1),
nn.BatchNorm2d(outchannel)
)
self.shortcut = nn.Sequential()
if s != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel,outchannel,kernel_size=1,stride =s),
nn.BatchNorm2d(outchannel)
)
def forward(self,x):
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,residualBlock=BasicBlock,n_class=10):
nn.Module.__init__(self)
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(1,64,kernel_size=7,stride = 2,padding=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace = True),
)
self.pooling = nn.Sequential(nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
self.layer1 = self.maker_layer(residualBlock,64,2,s = 1)
self.layer2 = self.maker_layer(residualBlock,128,2,s = 2)
self.layer3 = self.maker_layer(residualBlock,256,2,s = 2)
self.layer4 = self.maker_layer(residualBlock,512,2,s = 2)
self.fc = nn.Linear(512,n_class)
def maker_layer(self,block,channels,n_blocks,s):
strides = [s]+[1]*(n_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.inchannel,channels,stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self,x):
out = self.conv1(x)
out = self.pooling(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out,4)
out = out.view(out.size(0),-1)
out = self.fc(out)
return out