深度学习数据集生成函数的创建与使用

目录

数据生成:

回归模型(手动创建):

快速实现(调库):

二分类:

快速实现:

多分类

快速实现


dataset和dataloader:通过生成数据的生成器或者保存数据的映射关系来避免数据的重复储存

数据生成:

我们自己生成含有一定规律的数据集,

import random
import numpy as np
from torch import nn,optim
from torch.utils.data import Dataset,TensorDataset,DataLoader
import torch.nn.functional as F
from torch.utils.data import random_split
# from torch.utils.tensorboard import SummaryWriter
import torch


def tensorDataGenRe(num_features=1000, w=[2, 1, -1], bias=True, belta=0.1, bag=1):
    """
    :param num_features: feature count
    :param w: w
    :param bias: b
    :param belta: interferce  term coefficient
    :return:
    """
    if bias == True:
        input_num = len(w) - 1
        features_ture = torch.randn(num_features, input_num, dtype=torch.float32)
        w_ture = torch.tensor(w[:-1]).reshape(-1, 1).float()
        b_ture = torch.tensor(w[-1]).reshape(-1, 1).float()
        if input_num == 1:

            labels_ture = torch.pow(features_ture, bag) * w_ture + b_ture
        else:
            labels_ture = torch.mm(torch.pow(features_ture, bag), w_ture) + b_ture
        features = torch.cat((features_ture, torch.ones(len(features_ture), 1)), 1)
        labels = labels_ture + torch.randn(size=labels_ture.shape) * belta
    else:
        input_num = len(w)
        features = torch.randn(num_features, input_num, dtype=torch.float32)
        w_ture = torch.tensor(w).reshape(-1, 1).float()
        if input_num == 1:
            labels_ture = torch.pow(features, bag) * w_ture
        else:
            labels_ture = torch.mm(torch.pow(features, bag), w_ture)
        # features = torch.cat((features_ture,torch.ones(len(features_ture),1),1))
        labels = labels_ture + torch.randn(size=labels_ture.shape) * belta
    return features, labels


def tensorDataGenCla(feature_count=500, feature_class=2, class_count=3, big_size=[4, 2], bais=False):
    standard_size = torch.empty(feature_count, 1, dtype=torch.float32)  # be used for pretend labels
    mean_ = big_size[0]
    std_ = big_size[1]
    ll = []  # labels
    lf = []  # feature
    w = mean_ * (class_count - 1) / 2
    for i in range(class_count):
        features = torch.normal(i * mean_ - w, std_, size=(feature_count, feature_class))
        lf.append(features)
        labels = torch.full_like(standard_size, i)
        ll.append(labels)
    feature = torch.cat(lf).float()
    label = torch.cat(ll).float()
    # print(len(feature))
    if bais == True:
        feature = torch.cat((feature, torch.ones(len(feature), 1)), 1)
    return feature, label


def data_iter(features, bach_size, labels):
    """
    :param features: need features
    :param bachsize: Size of each dataset
    :param labels: need labels
    :return:
    """
    num_features = len(features)
    indeces = list(range(num_features))
    random.shuffle(indeces)
    l = []
    for i in range(0, num_features, bach_size):
        j = torch.tensor(indeces[i:min(i + bach_size, num_features)])
        # print(torch.index_select(labels, 0, j))
        l.append([torch.index_select(features, 0, j), torch.index_select(labels, 0, j)])
    return l


if __name__ == '__main__':
    pass
    # a,b = tensorDataGenRe()
    # bach_size = 10
    # data_iter(a, bach_size, b)
    # print(data_iter(a,bach_size,b))
    # print(a)
    # print(b)
    # tensorDataGenCla()

三个函数可以分别生成回归,分类,小批量随机的数据集,利用该数据集,实现一些简单的模型

回归模型(手动创建):

import torch
from torchData import tensorDataGenRe, data_iter
from torch.utils.tensorboard import SummaryWriter

# get data
features, lables = tensorDataGenRe()


# select Model
def select_model(X, w):
    """
    :param X: feature matrix
    :param w: weight
    :return:
    """
    hat = torch.mm(X, w)
    return hat


# select objective function (mse)
def select_func(y_hat, y):
    """
    :param y_hat: forecast result
    :param y: real result
    :return:
    """
    num_y = y.numel()
    mse = torch.sum((y_hat.reshape(-1, 1) - y.reshape(-1, 1)) ** 2)
    return mse / num_y


# select optimization algorithm (sgd)
def optim_algorithm(params, lr):
    """
    :param params:
    :param lr: learning rate
    :param params.data:  w. Data: the original value will be overwritten (i.e. w will be updated)
    during operations such as ‘- =’ but in pytorch, a calculation graph will be generated for each
    direction propagation. At this time, ‘W’ is tracked as the leaf node of the graph, but the updated
    ‘w’ will no longer be the leaf node in the calculation graph. To avoid this situation,
    Use ‘w.data()’ (return the specific value of the object without changing the differentiability of the object,
    because differentiability can be used for back propagation and tracking) or ‘with torch.not_grad()’ (pause tracking)
    or ’w.detach_ ()‘ (generate new object) these methods
    :return:
    """
    params.data -= lr * params.grad
    params.grad.zero_()


# train modle
def train_modle(bach_size, num_epochs, lr, w):
    """
    :param bach_size: data size each copy
    :param num_epochs: number of traversal data
    :param lr: learning rate
    :param w: weight
    :return: Final loss result
    """
    wirter = SummaryWriter(log_dir="reg_loss")
    torch.random.manual_seed(929)
    for epochs in range(num_epochs):
        for X, y in data_iter(features, bach_size, lables):
            loss = select_func(select_model(X, w), y)
            loss.backward()
            optim_algorithm(w, lr)
        train_loss = select_func(select_model(features, w), lables)
        wirter.add_scalar('mul', train_loss, epochs)
        # print(train_loss)
    return train_loss

if __name__ == '__main__':
    bach_size = 10
    num_epochs = 3
    lr = 0.01
    w = torch.zeros(3, 1, requires_grad=True)
    # print(train_modle(bach_size, num_epochs, lr, w))
    # train_modle(bach_size, num_epochs, lr, w)

快速实现(调库):

from torch.utils.data import Dataset, DataLoader,TensorDataset
import torch
from torchData import tensorDataGenRe
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

features, lables = tensorDataGenRe()
bach_size = 10
torch.manual_seed(929)
epochs = 3
# create data
features = features[:, :-1]
data = DataLoader(TensorDataset(features, lables), batch_size=bach_size, shuffle=True)


# create modle
class LR(nn.Module):
    def __init__(self, in_feature=2, out_feature=1):
        super(LR, self).__init__()
        self.linear = nn.Linear(in_feature, out_feature)

    def forward(self, x):
        out = self.linear(x)
        return out


LR_model = LR()

# criterion function
criterion = nn.MSELoss()

# optimization algorithm
optimizer = optim.SGD(LR_model.parameters(), lr=0.03)

wirter = SummaryWriter(log_dir="reg_loss")

def fit(net,criterion,optimizer,data,epochs):
    """
    :param net: nn
    :param criterion: objective function
    :param optimizer: optimizer algorithm
    :param data:
    :param epochs: number of fit data
    :return:
    """
    for epoch in range(epochs):
        for X,y in data:
            y_hat = net.forward(X)
            loss = criterion(y_hat,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    wirter.add_scalar('loss',loss,global_step=epoch)

if __name__ == '__main__':
    fit(net=LR_model,criterion=criterion,optimizer=optimizer,data=data,epochs=epochs)

二分类:

import torch
from torchData import tensorDataGenCla, data_iter


def logistic(X, w):
    return sigma(torch.mm(X, w))


def sigma(z_hat):
    return 1 / (1 + torch.exp(-z_hat))


def cla(sigma, p=0.5):
    return (sigma >= p).float()


def accuracy(sigma, y):
    accuracy_bool = cla(sigma).flatten() == y.flatten()
    return torch.mean(accuracy_bool.float())


def cross_entropy(sigma, y):
    return (-(1 / y.numel()) * torch.sum((1 - y) * torch.log(1 - sigma) + y * torch.log(sigma)))


def sgd(params, lr):
    params.data -= lr * params.grad
    params.grad.zero_()


if __name__ == '__main__':
    features, labels = tensorDataGenCla(class_count=2, bais=True)
    # print(features.shape)
    # print(labels.shape)
    torch.manual_seed(929)
    lr = 0.03
    batch_size = 10
    w = torch.ones(3, 1, requires_grad=True)
    epochs = 3
    for epoch in range(epochs):
        for X, y in data_iter(features, batch_size, labels):
            loss = cross_entropy(logistic(X, w), y)
            loss.backward()
            sgd(w, lr)
        train_acc = accuracy(cla(logistic(features, w)), labels)
        print(train_acc)
    print(w)

快速实现:

import torch
from  torchData import tensorDataGenCla,data_iter
from torch.utils.data import DataLoader,TensorDataset
import torch.nn as nn
import torch.optim as optim

lr = 0.03
batch_size = 10
epochs = 3
features,lables = tensorDataGenCla(class_count=2 )
lables = lables.float()
data = DataLoader(TensorDataset(features,lables),batch_size=batch_size,shuffle=True)

class Logistic(nn.Module):
    def __init__(self,in_features=2,out_features=1):
        super(Logistic, self).__init__()
        self.linear = nn.Linear(in_features,out_features)

    def forward(self,x):
        out = self.linear(x)
        return out
net = Logistic()

criterion = nn.BCEWithLogitsLoss() # have sigmoid

optimiter = optim.SGD(net.parameters(),lr)

def fit(net,batch_size,epochs,criterion,optimiter):
    for epoch in range(epochs):
        for X,y in data_iter(features,batch_size,lables):
            loss = criterion(net.forward(X),y)
            optimiter.zero_grad()
            loss.backward()
            optimiter.step()

def sigmoid(z_hat):
    return 1 / (1 + torch.exp(-z_hat))

def acc_zhat(zhat,y):
    sigma = sigmoid(zhat)
    yhat = cla(sigma)
    return accuracy(yhat,y)

def cla(sigma, p=0.5):
    return (sigma >= p).float()


def accuracy(sigma, y):
    accuracy_bool = cla(sigma).flatten() == y.flatten()
    return torch.mean(accuracy_bool.float())

if __name__ == '__main__':
    torch.manual_seed(292)
    fit(net=net,batch_size=batch_size,epochs=epochs,criterion=criterion,optimiter=optimiter)
    print(acc_zhat(net(features), lables))

多分类

import torch
from torchData import tensorDataGenCla,data_iter

feature,labels = tensorDataGenCla(bais=True,big_size=[6,2])

def softMax(X,w):
    m1 = torch.exp(torch.mm(X,w))
    sp = torch.sum(m1,1).reshape(-1,1) # 求和之后会变成一行张量,为了后续进行相除,需要转换形状
    return m1/sp

def m_cross_entropy(soft_z,y):
    y = y.long()
    prob_real = torch.gather(soft_z,1,y) # gather 对soft_z取索引,维度为一(行),索引号为y(y必须为一列的tensor)
    return (-(1/y.numel())*torch.log(prob_real).sum()) # sum() 对上述取得的所有概率取log,(本来是相乘,但是乘法会损失精度,log中用加法代替乘法


def sgd(params,lr):
    params.data -= lr*params.grad
    params.grad.zero_()

def accuracy(soft_z,y):
    acc_bool = torch.argmax(soft_z,1).flatten() == y.flatten()
    acc = torch.mean(acc_bool.float())
    return acc

if __name__ == '__main__':
    torch.manual_seed(929)
    epochs = 3
    lr = 0.03
    bach_size = 10
    w = torch.randn(3,3,requires_grad=True)
    for epoch in range(epochs):
        for x,y in data_iter(feature,bach_size,labels):
            # 这里面我们是为了更新梯度,得到“最优”梯度后,对整体的特征值进行预测里面我们是为了更新梯度,得到“最优”梯度后,对整体的特征值进行预测
            loss = m_cross_entropy(softMax(x,w),y)
            loss.backward()
            sgd(w,lr)
        print(accuracy(softMax(feature, w), labels))

    



快速实现

import torch.nn as nn
import torch.optim as optim
from torchData import tensorDataGenCla,data_iter
from torch.utils.data import DataLoader,TensorDataset #
import torch
import torch.nn.functional as F

bach_size = 10
epochs = 3
lr = 0.03

features,labels = tensorDataGenCla(big_size=[6,4])
labels = labels.float()
data = DataLoader(TensorDataset(features,labels),batch_size=bach_size,shuffle=True)


class SoftMaxR(nn.Module):
    def __init__(self,in_feature=2,out_feature=3):
        super(SoftMaxR, self).__init__()
        self.linear = nn.Linear(in_feature,out_feature)

    def forward(self,x):
        out = self.linear(x)
        return out

def fit(net,epochs,data,criterion,optimizer):
    for epoch in range(epochs):
        for x,y in data:
            zhat = net.forward(x)
            y  = y.flatten().long()
            loss = criterion(zhat,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(m_accuracy(F.softmax(net(features),1),labels))
def m_accuracy(zhat,y):
    real_zhat = torch.argmax(zhat,1).flatten() == y.flatten()
    return torch.mean(real_zhat.float())


if __name__ == '__main__':
    net = SoftMaxR()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr)
    fit(net=net,epochs=epochs,data=data,criterion=criterion,optimizer=optimizer)
    # 此时模型准确率只能在80 % 左右,要得到更好的效果,需要调整模型结构来继续优化
    print(torch.cuda.is_available())

你可能感兴趣的:(深度学习,深度学习,pytorch,python)