2020.2.5pytorch学习笔记
1.与神经元连接的空间大小叫做神经元的感受野(receptive field),其实就是滤波器的宽和高,在深度方向上,其大小总是和输入的深度相等。对待空间维度(宽和高)和深度维度是不同的,连接在空间上是局部的。
2.卷积层的输出深度是一个超参数,它与使用的滤波器数量一致,每种滤波器所做的就是在输入数据中寻找一种特征。
3.输出尺寸计算公式
K = (w-f+2p)/s+1
其中w表示输入尺寸,f是卷积核大小,p是边界填充0的数量。S表示步长。
4.参数共享之所以能够有效,是因为一个特征在不同位置表现是相同的。
5.图片特征具有不变性,通过下采样不会丢失图片拥有的特征。
6.尽可能使用小尺寸的滤波器。
7.零填充的使用可以让卷积层的输入和输出在空间上的维度保持一致。如果不使用填充,那么数据的尺寸会略微减小,在不断卷积的过程中,图像的边缘信息会过快损失掉。
8.torch.nn.Conv2d(in_channels,out_channels,kernel_size, stride=1,padding=0,dilation=1,groups=1,bias=True)
in_channels:卷积层输入深度
out_channels:卷积层输出深度,也就是卷积核个数
kernel_size:卷积核大小
bias:是否使用偏置
9.多层卷积层mnist数字识别
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1,16,kernel_size=3),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True)
)
self.layer2 = nn.Sequential(
nn.Conv2d(16,32,kernel_size=3),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(32,64,kernel_size=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True)
)
self.layer4 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Linear(128*4*4, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024,128),
nn.ReLU(inplace=True),
nn.Linear(128,10)
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
batch_size = 64
learning_rate = 1e-2
num_epoches = 20
data_tf = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=data_tf)
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
model = CNN()#这里改类型
if torch.cuda.is_available():
model = model.cuda()
criterion = nn.CrossEntropyLoss()#交叉熵
optimizer = optim.SGD(model.parameters(), lr=learning_rate)#随机梯度下降
#train
def train(model, train_loader, optimizer):
model.train()
for batch_idx, data in enumerate(train_loader):
img, label = data
#print('img.size: ', img.size())
#img = img.view(img.size(0), -1)
#print('img.size: ', img.size)
if torch.cuda.is_available():
img = Variable(img, volatile=True).cuda()
label = Variable(label, volatile=True).cuda()
else:
img = Variable(img, volatile=True)
label = Variable(label, volatile=True)
out = model(img)
optimizer.zero_grad()
loss = criterion(out, label)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 100 == 0:
print('loss: ', loss.data)
for epoch in range(num_epoches):
print('train epoch: ', epoch)
train(model, train_loader, optimizer)
def test(model, test_loader):
model.eval()
eval_loss = 0
eval_acc = 0
for data in test_loader:
img, label = data
# img = img.view(img.size(0), -1)#64,28*28这句话不要了,因为是卷积
if torch.cuda.is_available():
img = Variable(img, volatile=True).cuda()
label = Variable(label, volatile=True).cuda()
else:
img = Variable(img, volatile=True)
label = Variable(label, volatile=True)
out = model(img)
loss = criterion(out, label)
eval_loss += loss.data * label.size(0)
_, pred = torch.max(out, 1)
num_correct = (pred == label).sum()
eval_acc += num_correct.data[0]
#计算平均损失和准确度
print('test loss:{:.6f}, acc:{:.6f}'.format(eval_loss/len(test_dataset), eval_acc.cpu().numpy()/len(test_dataset)))
test(model, test_loader)
训练时间明显增长
test loss:0.023492, acc:0.991500
精确度达到99.15%!!!