softmax回归python实现

from d2l import torch as d2lutils
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

class Accumulator:
    """在`n`个变量上累加。"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


# ①准备数据集
def load_data_fashion_mnist(batch_size, resize=None):
    # PyTorch中的一个转换函数,它的作用是将一个PIL Image或numpy.ndarray图像转换为一个Tensor数据类型。
    trans = [transforms.ToTensor()]
    # 是否需要改变大小
    if resize:
        trans.insert(0, transforms.Resize(resize))
    # 函数compose将这些转换操作组合起来
    trans = transforms.Compose(trans)
    # 训练数据
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    # 测试数据
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    # 返回值
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                                        num_workers=4),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                                        num_workers=4))

# 批量大小为256
batch_size = 256
# 获取训练数据集和测试数据集
train_iter, test_iter = load_data_fashion_mnist(batch_size)

# ②展平每个图像,把它们看作长度为784的向量(28*28=784)。 因为我们的数据集有10个类别,所以网络输出维度为10
num_inputs = 784
num_outputs = 10
# W为从一个高斯分布抽取一个784行,10列的矩阵,需要计算梯度
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
# b为长度为10的向量
b = torch.zeros(num_outputs, requires_grad=True)

# ③实现softmax
def softmax(X):
    # 指数函数e^X
    X_exp = torch.exp(X)
    # 列维度求和,保留维度
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition


# ④实现softmax回归模型
def net(X):
    # X.reshape((-1, W.shape[0])也就是X重新定义形状,在0维度上自动,假设是N,在1维度上取 W.shape[0]=784,也就是说matmul的第一个参数是N*784的二维张量;
    # W为784的一维张量,满足matmul的第四种情况,结果为N*1的矩阵,或者长度为N的向量
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)


# ⑤实现交叉熵损失函数
def cross_entropy(y_hat, y):
    # 假设y = torch.tensor([0, 2])
    # 假设y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
    # len(y_hat) 长度为2
    # range(len(y_hat)) = [0,1] 左闭右开
    # return -torch.log(y_hat[[0,1], y])
    # return -torch.log([0.1,0.5])
    return -torch.log(y_hat[range(len(y_hat)), y])


# ⑥将预测类别与真实 y 元素进行比较
def accuracy(y_hat, y):
    """计算预测正确的数量。"""
    # 确保是二维
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        # 最大值的索引
        y_hat = y_hat.argmax(axis=1)
        print("===y_hat====")
        print(y_hat)
    # 转换为True  False True=1 False=0
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


# ⑦评估在任意模型 net 的准确率
def evaluate_accuracy(net, data_iter):
    """计算在指定数据集上模型的精度。"""
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    for X, y in data_iter:
        print("===开始=====================================================================")
        print("===net(X)===")
        print(net(X))
        print("===y====")
        print(y)
        print("===正确的个数====" +str(accuracy(net(X), y)))
        metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]



# ⑨定义一个函数来训练一个迭代周期
def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    # 判断net模型是否为深度学习类型,将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()  # 要计算梯度

    # Accumulator(3)创建3个变量:训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        # 判断updater是否为优化器
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()  # 把梯度设置为0
            l.mean().backward()  # 计算梯度
            updater.step()  # 自更新
        else:
            # 使用定制的优化器和损失函数
            # 自我实现的话,l出来是向量,先求和再求梯度
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度,metric的值由Accumulator得到
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    # num_epochs:训练次数
    for epoch in range(num_epochs):
        # train_epoch_ch3:训练模型,返回准确率和错误度
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        # 在测试数据集上评估精度
        test_acc = evaluate_accuracy(net, test_iter)

    train_loss, train_acc = train_metrics
    print("训练损失:" + str(train_loss))
    print("训练精度:" + str(train_acc))
    print("测试精度:" + str(test_acc))
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

# 定义学习率
lr = 0.1
def updater(batch_size):
    return d2lutils.sgd([W, b], lr, batch_size)

# 训练
if __name__ == '__main__':
    num_epochs = 10
    train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)


训练10轮后精度如下

训练损失:0.44705355275472003
训练精度:0.8476166666666667
测试精度:0.8222

你可能感兴趣的:(回归,python,数据挖掘)