数据构建和加载,重构线性回归

import torch
from torch import nn

#线性层组件
def test01():
    #nn.Linear():线性层组件
    #参数:
    #in_feature:输入特征数
    #out_feature:输出特征数
    model=nn.Linear(20,30)
    x=torch.randn(100,20)
    y=model(x)
    print(y.shape)


# 优化器
def test02():
    # 初始化x值
    x = torch.tensor(data=3.0, requires_grad=True)
    # 学习率
    lr = 0.1

    y = x ** 2
    #定义线性模型
    model = nn.Linear(in_features=20, out_features=10)
    # 定义损失函数
    criterion = nn.MSELoss()
    # 定义优化器,主要做梯度下降,更新模型参数
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    #初始化输入数据
    x=torch.randn(100,20)
    #标签数据
    y=torch.randn(100,10)
    #预测值
    y_pred=model(x)
    #损失函数
    loss=criterion(y_pred,y)

    #反向传播
    loss.backward()

    #梯度下降更新模型参数
    optimizer.step()

    #梯度清零
    optimizer.zero_grad()

    print(loss.item())
    print(model.weight)
    print(model.bias)

if __name__=='__main__':
    test02()
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader

#自定义数据构建类

torch.manual_seed(0)

#自定义数据结构类
#1.继承Dataset
#2.必须实现
class MyDataset(Dataset):
    def __init__(self,data,label):
        self.data = data
        self.label = label
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        data = self.data[index]
        label = self.label[index]
        return data, label

def test01():
    x=torch.randn(100,10)
    y=torch.randn(100,1)
    mydataset = MyDataset(x,y)
    print(mydataset)


#TensorDataset():是Dataset的一个实现类,可以将data和lable作为参数传给TensorDataset生成数据集
def test02():
    x=torch.randn(100,10)
    y=torch.randn(100,1)
    mydataset = TensorDataset(x,y)
    print(mydataset[0])

#数据加载器:DataLoader,是一个迭代器
#参数:
#bath_size:批量数据,将多个样本打包成一个批量,加载器每次加载一个批量的数据
#shuffle:为Ture表示打乱数据顺序

def test03():
    x=torch.randn(100,10)
    y=torch.randn(100,1)
    myDataset = MyDataset(x,y)
    dataloader = DataLoader(myDataset, batch_size=4,shuffle=True)
    for data,label in dataloader:
        print(data)
        print(label)


if __name__=='__main__':
    test02()
    test01()

from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_regression
import torch

input_features = 5


def build_data(noise=0.1, bias=0):
    x, y, coef = make_regression(
        # 样本数量
        n_samples=1000,
        # 每个样本包涵的特征数量
        n_features=input_features,
        # 输出标签数量
        n_targets=1,
        # 如果为True则返回真实系数(权重)
        coef=True,
        # 随机种子
        random_state=42,
        # 添加噪声
        noise=noise,
        # 设置偏移值
        bias=bias
    )
    return x, y, coef, bias


def train():
    x, y, coef, bias = build_data()
    # 将数据转换为张量
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    # 封装数据集
    dataset = TensorDataset(x, y)
    # 数据加载器
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

    # 定义线性模型
    model = nn.Linear(input_features, out_features=1)
    # 定义损失函数
    criterion = nn.MSELoss()
    # 定义优化器
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    epochs = 100

    # 开始训练
    for epoch in range(epochs):
        loss_sum = 0
        for train_x, train_y in dataloader:
            # 获取预测值
            y_pred = model(train_x)
            # 损失计算
            loss = criterion(y_pred, train_y.view(-1, 1))
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()
            # 梯度清零
            optimizer.zero_grad()
            # 损失值累加
            loss_sum += loss.item()
        print(f'epoch:{epoch}, loss:{loss_sum / len(dataloader)}')

    w = model.weight.data.view(-1).numpy()
    b = model.bias.data.item()
    print(f'真实w={coef}, 训练w={w}')
    print(f'真实偏置:{bias}, 训练偏置b={b}')


if __name__ == '__main__':
    train()

你可能感兴趣的:(重构,线性回归,pytorch)