pytorch实现逻辑回归(代码正确,注释完整)

pytorch实现逻辑回归,和线性回归的差别只在于loss的定义

本文讲述了如何使用自己数据集的处理办法

数据集
https://www.kaggle.com/c/digit-recognizer/data

import torch
import numpy as np
import torch.nn as nn
from torch.autograd import Variable
import pandas as pd
from sklearn.model_selection import train_test_split

#注意此处类型转化为float,不然后面求导会报错
train = pd.read_csv('digit-recognizer/train.csv', dtype=np.float32)

#获取x,y
y = train.label.values
x = train.loc[:, train.columns!='label'].values / 255

#注意是这四个顺序,获得训练集测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2019)

#转化为tensor,注意这里y要转为longtensor,因为是进行交叉熵loss计算
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train).type(torch.LongTensor)

x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test).type(torch.LongTensor)

#batch size 和 轮数
batch_size = 128
iteration_num = 1000


'''
torch.utils.data.TensorDataset用于将训练集x,y合并
'''
train = torch.utils.data.TensorDataset(x_train, y_train)
test = torch.utils.data.TensorDataset(x_test, y_test)

'''
DataLoader用于随机播放和批量处理数据。
它可用于与多处理工作程序并行加载数据
在dataset基础上多了batch_size, shuffle等操作
'''
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True)



class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        return self.linear(x)
    
input_dim = 28*28   #一维向量长度
output_dim = 10  #10个类

model = LogisticRegressionModel(input_dim, output_dim)
CrossEntropyLoss = nn.CrossEntropyLoss()

loss_list = []

learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)


for iteration in range(iteration_num):
    for j, (images, labels) in enumerate(train_loader):
        #将其转化为变量
        train = Variable(images.view(-1, 28*28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(train)
        loss = CrossEntropyLoss(outputs, labels)
        loss.backward()
        optimizer.step()
       
        '''
        验证集accuracy计算
        '''
        if j % 50 == 0:
            
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                test = Variable(images.view(-1, 28*28))
                
                outputs = model(test)
                '''
                torch.max(input, dim, keepdim=False, out=None)
                按维度dim返回最大值,0为按列取最大值,1为按行取最大值
                (最大值,最大值索引)
                下面语句代表按行取最大值对应索引,即标签值
                '''
                prediction = torch.max(outputs.data, 1)[1]
                
                total += len(labels)
                correct += (prediction == labels).sum()
            
            accuracy = 100 * correct / float(total)
            loss_list.append(loss.data)
            
    if iteration % 50 ==0:
        print('Epoch:{} Loss:{} Accuracy:{}'.format(iteration, loss.data, accuracy))

你可能感兴趣的:(Pytorch)