SNN由于无法考虑到图片数据的维度关系,在预测精度上会被限制,本章我们采用CNN卷积神经网络来实现手写字识别,引入卷积层和池化层,来提高准确度,同时改进优化器利用adam方法来求取权重。
比赛的详细信息及数据请看主页 Pytorch入门练习1。
查看目录结构
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import pandas as pd
import numpy as np
data_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
data_df.head()
训练集有42000条数据,共有785列,第一列为Label标签数据,第2~784列为图像灰度值数据
label_df = data_df["label"]
feature_df = data_df.drop("label", axis=1)
# 归一化处理
feature_df = feature_df/255.0
# 进行数据变换,变换成1*28*28(C*H*W)的图像输入形式
feature_df = feature_df.apply(lambda x:x.values.reshape(1,28,28), axis=1)
feature_df.head()
自定义自己的数据集,方便后续的处理。
import torch
from torch.nn import Module
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
class DigitRecongnizerDataset(Dataset):
def __init__(self, label_df, feature_df, transform=None, target_transforms=None):
self.label_df = label_df
self.images = feature_df
self.transfrom = transform
self.target_transforms = target_transforms
def __len__(self):
return len(label_df)
def __getitem__(self, item):
image = self.images[item]
label = self.label_df[item]
if self.transfrom:
self.transfrom(image)
if self.target_transforms:
self.target_transforms(label_df)
return label,image
drDataset = DigitRecongnizerDataset(label_df, feature_df, transform=transforms.ToTensor())
可视化展示图片
可视化展示一下图片
import matplotlib.pyplot as plt
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(drDataset), size=(1,)).item()
label, img = drDataset[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(label)
plt.axis("off") # 不显示坐标轴
# squeeze是降维,去除度数1的维度,如灰度图像中C(通道)=1,绘制图像时不需要通道C这个维度,直接传递二维矩阵即可,所以将其去除,但这里由于img是28*28的矩阵不带C这个维度,所以不需要squeeze(),所以不需要squeeze在这里不起作用
plt.imshow(img.squeeze(), cmap="gray")
plt.show()
这里搭建的是一个2层卷积层,1层池化层和3层全连接层的网络结构。
nn.Conv1d 用于文本数据的卷积处理,对宽度进行卷积
nn.Conv2d 用于对图像数据进行卷积处理,对矩阵的长宽进行卷积
Conv2d(in_channels, out_channel, kernel_size)
注意:池化层中panding(图像周围填充多少层0) <= kernel_size
/2.
from torch import nn
class CNN(Module):
def __init__(self):
super(CNN, self).__init__()
# 卷积池化层
self.convd_relu_stack = nn.Sequential(
# 卷积操作以后变为10*24*24
nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5),
nn.ReLU(),
# 池化以后10*6*6
nn.MaxPool2d(kernel_size=4),
# 卷积以后变为:20*4*4
nn.Conv2d(10, 20, 3),
nn.ReLU()
)
# 全连接层
self.linear_relu_stack = nn.Sequential(
nn.Linear(20*4*4, 160),
nn.ReLU(),
nn.Linear(160,20),
nn.ReLU(),
nn.Linear(20, 10),
nn.Softmax(dim=1)
)
def forward(self,x):
batch_size = x.size(0) # 获取batch_size值
convd_result = self.convd_relu_stack(x)
# 将结果的(64,20,4,4)压平成(64,20*4*4),输入到全连接层中
convd_result = convd_result.view(batch_size,-1)
result_ts = self.linear_relu_stack(convd_result)
return result_ts
将dataset数据按训练集:测试集 = 8:2的形式进行分割
from torch.utils.data import random_split
train_size = int(0.8*len(drDataset))
test_size = int(0.2*len(drDataset))
# 训练集:测试集≈8:2
train_dataset,test_dataset = random_split(drDataset,[train_size,test_size])
将dataset转换为可迭代的dataloader形式
# batch_size为每次批处理的数据量,可以根据自己需求进行设置,我这里习惯性设置为64
# shuffle表示是否洗牌,就是每次取出64个数据以后要不要将数据打乱顺序,由于该数据没有时序关系,所以可以设置为true
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)
n_epochs = 10 # 迭代次数
learn_rate = 0.001 # 学习率
size = test_size + train_size
device = 'cuda' if torch.cuda.is_available() else "cpu"
model = CNN().to(device)
import torch.optim as optim
# 定义交叉熵损失函数
loss_fn = nn.CrossEntropyLoss()
# 定义adam优化器
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
def train_loop(dataloader,model,loss_fn, optimizer):
for n,(y,x) in enumerate(dataloader):
# 注意要和权重的类型保持相同
x = x.float().to(device)
y = y.to(device)
pred = model(x)
loss = loss_fn(pred,y)
# 存储的变量梯度清零
optimizer.zero_grad()
# 求反向传播的梯度
loss.backward()
# 开始优化权重
optimizer.step()
# 共33600条数据,每进行100个batch输出一次值
if n%100==0:
loss,current = loss.item(), (n + 1) * len(x)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
test_loss, corrent = 0, 0
size = len(dataloader.dataset)
batchNum = len(dataloader)
with torch.no_grad():
for y,x in dataloader:
x = x.float().to(device)
y = y.to(device)
pred_y = model(x)
test_loss += loss_fn(pred_y, y).item()
# argmax(1)将独热编码的形式解释成标签(0,1,2,3..)最初的形式,type()是为了将bool类型的true转为1,false转为0,这样可以使corrent来计算出预测正确的个数
corrent += (pred_y.argmax(1)==y).type(torch.float).sum().item()
# 平均损失,总共的损失除以batch的个数
arg_loss = test_loss/batchNum
# 准确率
correct_rate = corrent/size
print(f"Test Describe:\nAccuracy: {(100*correct_rate):>0.1f}%, Avg loss: {arg_loss:>8f} \n")
# 对模型进行训练
for n in range(n_epochs):
print(f"======{n}======")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
可以看到最后可以达到97.8%的效果,比较前面的SNN的82%有很大的提升。
读取预测数据
pred_df = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
pred_df.head()
对预测数据做同样的处理
# 对预测数据的形状进行变换,使其与训练数据保持一致,同时要注意数据类型也要与上面保持一致为float类型
pred_ts = torch.from_numpy(pred_df.values).float().reshape(len(pred_df), 1, 28, 28)
print(pred_ts.shape)
pred_y = model(pred_ts)
# 选取概率值最大的类别,作为标签
pred_y = pred_y.argmax(1)
# 将tensor转换为numpy类型数据
pred_y_np = pred_y.numpy()
# 转为pandas类型以方便查看和存储,index+1 是为了和sample_submission.csv结果文件的id保持一致。
pred_y_pd = pd.DataFrame({"Label":pred_y_np.tolist()},index=pred_df.index+1)
pred_y_pd
pred_y_pd.index.name = "ImageId"
pred_y_pd.to_csv("/kaggle/working/submission.csv")