使用PyTorch从零开始手工实现Softmax回归
,对Fashion MNIST数据集进行分类。
《动手学深度学习》有人用pytorch进行了实现(整书地址见文末参考链接),实际学习过程中发现里面的代码在我的环境里(pytorch 0.4.0)有些小问题(也许是因为我手动读取数据集的结果与pytorch提供的torchvision.datasets读取数据集的结果格式不同),更正并整理后发出来也许对大家有帮助。
本节原内容在这里:《动手学深度学习 PyTorch》3.6 softmax回归的从零开始实现。
这里的代码在原内容做了以下修改:
准确率计算错误
的问题d2lzh
模块。确保安装了基本环境后,代码拷贝到你的机子上就能用,不需要额外下载其他模块import torch
import torchvision
import numpy as np
import sys
print("torch.__version__:", torch.__version__)
print("torchvision.__version__:", torchvision.__version__)
输出:
torch.__version__: 0.4.0
torchvision.__version__: 0.2.1
采用pytorch的提供的函数从网上下载数据集太慢(由于墙的原因),所以我是从github手动下载数据集,并手动读取。
fashion mnist数据集github下载地址,下载里面data/fashion
的内容即可。我把数据集上传了一份到CSDN上,如果github访问也很慢的时候,可以从这里下载:fashion mnist数据集csdn下载地址。
def load_mnist(path, kind='train'):
""" load自己手动下载的数据集 """
import os
import gzip
import numpy as np
"""Load MNIST data from `path`"""
labels_path = os.path.join(path,
'%s-labels-idx1-ubyte.gz'
% kind)
images_path = os.path.join(path,
'%s-images-idx3-ubyte.gz'
% kind)
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8,
offset=16).reshape(len(labels), 784)
return images, labels
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
"""Download the fashion mnist dataset and then load into memory."""
#原来的方法,因为墙的原因,下载特别慢
"""
trans = []
if resize:
trans.append(torchvision.transforms.Resize(size=resize))
trans.append(torchvision.transforms.ToTensor())
transform = torchvision.transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
"""
#我的方法,采用自己下载好的数据集
X_train, y_train = load_mnist('/home/user_name/Datasets/FashionMNIST/raw', kind='train')
X_test, y_test = load_mnist('/home/user_name/Datasets/FashionMNIST/raw', kind='t10k')
X_train_tensor = torch.from_numpy(X_train).to(torch.float32).view(-1, 1, 28, 28) * (1/255.0)
X_test_tensor = torch.from_numpy(X_test).to(torch.float32).view(-1, 1, 28, 28) * (1/255.0)
y_train_tensor = torch.from_numpy(y_train).to(torch.int64).view(-1, 1)
y_test_tensor = torch.from_numpy(y_test).to(torch.int64).view(-1, 1)
import torch.utils.data as Data
mnist_train = Data.TensorDataset(X_train_tensor, y_train_tensor)
mnist_test = Data.TensorDataset(X_test_tensor, y_test_tensor)
# 分割线 ============================================================
if sys.platform.startswith('win'):
num_workers = 0 # 0表示不用额外的进程来加速读取数据
else:
num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
return train_iter, test_iter
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
W.requires_grad_(requires_grad=True);
b.requires_grad_(requires_grad=True);
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(dim=1, keepdim=True)
return X_exp / partition # 这里应用了广播机制
def net(X):
return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)
交叉熵:
H ( y ( i ) , y ^ ( i ) ) = − ∑ j = 1 q y j ( i ) log y ^ j ( i ) , H\left(\boldsymbol y^{(i)}, \boldsymbol {\hat y}^{(i)}\right ) = -\sum_{j=1}^q y_j^{(i)} \log \hat y_j^{(i)}, H(y(i),y^(i))=−j=1∑qyj(i)logy^j(i),
当每张图只有一个类别时,简化为:
H ( y ( i ) , y ^ ( i ) ) = − log y ^ y ( i ) ( i ) H(\boldsymbol y^{(i)}, \boldsymbol {\hat y}^{(i)}) = -\log \hat y_{y^{(i)}}^{(i)} H(y(i),y^(i))=−logy^y(i)(i)
假设训练数据集的样本数为 n n n,交叉熵损失函数定义为
ℓ ( Θ ) = 1 n ∑ i = 1 n H ( y ( i ) , y ^ ( i ) ) . \ell(\boldsymbol{\Theta}) = \frac{1}{n} \sum_{i=1}^n H\left(\boldsymbol y^{(i)}, \boldsymbol {\hat y}^{(i)}\right ). ℓ(Θ)=n1i=1∑nH(y(i),y^(i)).
# 定义交叉熵损失
def cross_entropy(y_hat, y):
return - torch.log(y_hat.gather(1, y.view(-1, 1)))
# tensor.gether的作用举例
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))
输出:
tensor([[ 0.1000],
[ 0.5000]])
# 计算单样本(一个样本)的准确率
def accuracy(y_hat, y):
return (y_hat.argmax(dim=1) == y).float().mean().item()
print(accuracy(y_hat, y))
输出:
0.5
# 评估模型net在数据集data_iter中的准确率
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
# 原文方法
# 对于pytorch 0.4.0计算准确率出错(其他版本未测试)。原因是"=="两边数据shape不同,会进行广播
# acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
# 我的方法
acc_sum += (net(X).argmax(dim=1) == y.squeeze(1)).float().sum().item()
n += y.shape[0]
return acc_sum / n
# 因为我们随机初始化了模型`net`,所以这个随机模型的准确率应该接近于类别个数10的倒数即0.1。
print("init evaluate_accuracy: ", evaluate_accuracy(test_iter, net))
输出:
init evaluate_accuracy: 0.0931
def sgd(params, lr, batch_size):
"""
这里自动求梯度模块计算得来的梯度是一个批量样本的梯度和。
为了和原书保持一致,这里除以了batch_size,但是应该是不用除的,因为一般用PyTorch计算loss时就默认已经
沿batch维求了平均了。
"""
for param in params:
# 注意这里更改param时用的param.data,避免被`autograd`记录从而影响到梯度反向传播
param.data -= lr * param.grad / batch_size
num_epochs, lr = 5, 0.1
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, optimizer=None):
""" training network """
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y).sum()
# 梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
sgd(params, lr, batch_size)
else:
optimizer.step() # “softmax回归的简洁实现”一节将用到
train_l_sum += l.item()
# 原文方法
# 对于pytorch 0.4.0计算准确率出错(其他版本未测试)。原因是"=="两边数据shape不同,会进行广播
# train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
# 我的方法
train_acc_sum += (y_hat.argmax(dim=1) == y.squeeze(1)).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
# 开始训练
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
输出:
epoch 1, loss 0.7856, train acc 0.751, test acc 0.789
epoch 2, loss 0.5702, train acc 0.813, test acc 0.811
epoch 3, loss 0.5257, train acc 0.826, test acc 0.821
epoch 4, loss 0.5011, train acc 0.833, test acc 0.819
epoch 5, loss 0.4852, train acc 0.837, test acc 0.821
def get_fashion_mnist_labels(labels):
""" 函数可以将数值标签转成相应的文本标签 """
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
from IPython import display
from matplotlib import pyplot as plt
def use_svg_display():
"""Use svg format to display plot in jupyter"""
display.set_matplotlib_formats('svg')
def show_fashion_mnist(images, labels):
""" 该函数在一行里画出多张图像和对应标签 """
use_svg_display()
# 这里的_表示我们忽略(不使用)的变量
_, figs = plt.subplots(1, len(images), figsize=(12, 12))
for f, img, lbl in zip(figs, images, labels):
f.imshow(img.view((28, 28)).numpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
# 预测
X, y = iter(test_iter).next()
true_labels = get_fashion_mnist_labels(y.numpy())
pred_labels = get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]
# 画前10个预测结果
show_fashion_mnist(X[0:9], titles[0:9])
《动手学深度学习 PyTorch》3.6 softmax回归的从零开始实现
《动手学深度学习》
fashion mnist数据集下载地址,下载里面data/fashion
的内容即可