LSTM_Train

LSTM_Train.py

#根据最优参数进行模型训练

import torch
import torch.nn as nn
import numpy as np
import time
import math
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import torch
import torch.nn as nn
from pandas import read_csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from pandas import read_csv

fm=pd.read_excel(r"D:\python\代码\data1-excel.xlsx")
print(fm)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


'''
#Transformer模型构建
class PositionalEncoding(nn.Module):  # 子类继承父类nn.Module
    def __init__(self, d_model, max_len=5000):  # 在创建类的实例的时候自动执行,如果没有__init__,那么forward只有在调用时才能执行。5000只是一个预定义的大小,用来储存数据的,有一个默认值是为了好统一处理,什么都不影响
        super(PositionalEncoding, self).__init__()  # 用父类的方法初始化子类的属性
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
    def forward(self, x):
        return x + self.pe[:x.size(0), :]

#搭建搭建Transformer的基本结构
class TransAm(nn.Module):
    def __init__(self, feature_size=250, num_layers=1, dropout=0.1):  # dropout是将0.1*某一层神经元个数个神经元的值置为0。里面的参数只是一个预定义大小,用来储存数据,有一个默认值是为了好统一处理
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)  # transformer encoder的组成部分,代表encode的一个层,而encode就是将transformerEncodeLayer重复几层
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)  # transformer的encode部分,即将上述的encode-layer作为参数输入初始化以后可以得到TransformerEncode
        self.decoder = nn.Linear(feature_size, 1)
        self.init_weights()

    def init_weights(self):  # 权重初始化
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):  # mask用来遮挡部分视野
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        output = self.decoder(output)
        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

def create_inout_sequences(input_data, tw):  # 该函数将接受原始输入数据,并返回一列元组。把20个数据和移动后的20个数据拼接起来构成一个具有移动趋势的数据,然后模型是对这种趋势做预测,移动的大小是1
    inout_seq = []
    L = len(input_data)
    for i in range(L - tw):
        train_seq = input_data[i:i + tw]
        train_label = input_data[i + output_window:i + tw + output_window]
        inout_seq.append((train_seq, train_label))
    return torch.FloatTensor(np.array(inout_seq))  # FloatTensor将数据集转换为张量


def get_data(data,n):
    series = data
    scaler = MinMaxScaler(feature_range=(-1, 1))
    series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1)   # 将数据归一化到[-1,1]之间
    train_samples = int(n * len(series))
    train_data = series[:train_samples]
    test_data = series[train_samples:]
    train_sequence = create_inout_sequences(train_data, input_window)  # 创建用于训练的序列和相应的标签
    train_sequence = train_sequence[:-output_window]  # 索引从起始位置到倒数第一个位置,即去掉标签
    test_data = create_inout_sequences(test_data, input_window)
    test_data = test_data[:-output_window]
    return train_sequence.to(device), test_data.to(device)


def get_batch(source, i, batch_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i:i + seq_len]
    input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1))  # 将数据转换成torch在cuda中的格式
    target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1))
    return input, target


def train(train_data,epochs):#训练函数
    for epoch in range(1, epochs + 1):   # 一个epoch就是遍历一次整个训练集
        model.train()   # 启用BN层(Batch Normalization)和dropout,保证BN层能够用到每一批数据的均值和方差,对于dropout,随机取一部分网络连接来训练更新参数
        lnn = enumerate(range(0, len(train_data) - 1, batch_size))  # 在遍历中可以获得索引和元素值
        start_time2 = time.time()
        for batch_index, i in lnn:   # 遍历每个批量大小的数据
            data, targets = get_batch(train_data, i, batch_size)  # 批量方法:在一个epoch中将参数在每个训练数据上需要更新的值记录下来,最后叠加在一起再对参数进行更新
            optimizer.zero_grad()   # 清空之前的梯度
            output = model(data)   # 将数据装入model得到输出
            loss = criterion(output, targets)   # 将输出和目标数据传入损失函数对象
            loss.backward()   # 反向传播,计算当前梯度
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7)   # 进行梯度规范化,防止出现梯度消失或爆炸,限定梯度值在-0.7到0.7之间
            optimizer.step()   # 根据梯度更新网络参数。梯度叠加就是每次获取1个batch的数据,计算1次梯度,梯度不清空,不断累加,累加到一定次数后,根据累加的梯度更新网络参数,然后清空梯度,进入下一次循环

        scheduler.step()   # 每个epoch结束时,调用优化器步长调节方法,用来衰减学习率
        end_time2 = time.time()
        print("epoch:{} 耗时(单位秒):{}".format(epoch,(end_time2 - start_time2)))

def evaluate(eval_model, data_source):#批处理函数 (该函数没用上)
    eval_model.eval()
    total_loss = 0
    eval_batch_size = 1000
    with torch.no_grad():  # 表明当前计算不需要反向传播
        for i in range(0, len(data_source) - 1, eval_batch_size):
            data, targets = get_batch(data_source, i, eval_batch_size)
            output = eval_model(data)
            total_loss += len(data[0]) * criterion(output, targets).cpu().item()
    return total_loss / len(data_source)


def plot_and_loss(eval_model, data_source, epoch,name):#测试集测试与绘图
    eval_model.eval()  # 不启用BN层和dropout,保证BN层能够用到全部训练数据的均值和方差,即测试过程中要保证BN层的均值和方差不变,对于Dropout,用到了所有网络连接,即不进行随机舍弃神经元
    total_loss = 0.
    test_result = torch.Tensor(0)  # 是python类,默认张量类型(将数据类型转化为张量,后面用cuda跑更快)
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i, 1)
            output = eval_model(data)
            total_loss += criterion(output, target).item()   # 将每个批量数据的损失相加获得总的损失。item()作用是取出单元素张量的元素值并返回该值,保持该元素类型不变,使用item()函数取出的元素值的精度更高
            test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0)  # 每个循环产生1个节点,最后拼接到一起
            truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)  # 按维数0(行)拼接。X.view(-1)中的-1本意是根据另外一个数来自动调整维度,但是这里只有一个维度,因此就会将X里面的所有维度数据转化为一维,并且按先后顺序排列
    line1, = plt.plot(test_result, color="red")
    line2, = plt.plot(truth, color="blue")
    line3, = plt.plot(truth-test_result, color="green")
    plt.legend([line1, line2, line3], ['测试集预测值', '测试集实际值', '预测值-实际值差值'], loc='upper right')
    plt.xlabel(u"样本序号(单位:秒)")
    plt.ylabel(u"变化趋势")
    plt.grid(True, which='both')
    plt.axhline(y=0, color='k')
    plt.savefig('./'+name+'_训练_epoch%d.png' % epoch)
    plt.close()
    return total_loss / i  # 只是将值返回到调用函数的代码行,实际作用没用上

def residuals_prediction(eval_model, data_source, epoch,name):#针对实际数据的预测函数
    eval_model.eval()
    total_loss = 0.
    test_result = torch.Tensor(0)
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i, 1)
            output = eval_model(data)
            total_loss += criterion(output, target).item()
            test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0)
            truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)
    plt.plot(test_result, color="red")
    plt.plot(truth, color="blue")
    plt.plot(truth-test_result, color="yellow")
    plt.grid(True, which='both')
    plt.axhline(y=0, color='k')
    plt.savefig('./'+name+'_预测_epoch%d.png' % epoch)
    plt.close()
    return test_result,truth

#模型训练部分
#超参数
window_h = 20
batch_h = 512
epochs_h = 100

#训练集
N_Training = 190000
N_reality_Begin = 190000
N_reality_end = 210000

input_window = torch.tensor(int(window_h),dtype=torch.int32)
output_window = torch.tensor(1,dtype=torch.int32)
batch_size = torch.tensor(int(batch_h ),dtype=torch.int32)

names = ['Coal','electric','Inlet flow','outlet temperature','differential pressure','Outlet flow']

for i in range(0, 6):
    name = names[i]
    print("----------------{} 开始训练--------------------".format(name))
    fm1= fm[fm['类别']==0]
    fm1= fm1[0:N_Training]
    data = fm1[name]
    train_data, val_data = get_data(data,0.7)
    train_data.to(device)
    val_data.to(device)
    model = TransAm().to(device)
    criterion = nn.MSELoss()
    lr = 0.01
    epochs = torch.tensor(int(epochs_h),dtype=torch.int32)
    epochs.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)   # 优化器
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95)  # 学习率自动调节器,每过1次,更新一次学习率,每执行一次scheduler.step()为1次,gamma为学习率下降的乘数因子
    train(train_data,epochs)
    plot_and_loss(model, val_data, epochs,name)
    torch.save(model,'model_'+name)
    print("----------------{} 训练结束--------------------".format(name))


print('------------------------模型训练结束---------------------------------')'''








class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=200, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]


def get_data(data,n):
    series = data
    scaler = MinMaxScaler(feature_range=(-1, 1))
    series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1)   # 将数据归一化到[-1,1]之间
    train_samples = int(n * len(series))
    train_data = series[:train_samples]
    test_data = series[train_samples:]
    train_sequence = create_inout_sequences(train_data, input_window)  # 创建用于训练的序列和相应的标签
    train_sequence = train_sequence[:-output_window]  # 索引从起始位置到倒数第一个位置,即去掉标签
    test_data = create_inout_sequences(test_data, input_window)
    test_data = test_data[:-output_window]
    return  train_sequence.to(device),  test_data.to(device)



def get_batch(source, i, batch_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i:i + seq_len]
    input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1))  # 将数据转换成torch在cuda中的格式
    target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1))
    return input, target


def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return torch.FloatTensor(np.array(inout_seq))  # FloatTensor将数据集转换为张量



def train(train_data,epochs):#训练函数
    for epoch in range(1, epochs + 1):   # 一个epoch就是遍历一次整个训练集
        model.train()   # 启用BN层(Batch Normalization)和dropout,保证BN层能够用到每一批数据的均值和方差,对于dropout,随机取一部分网络连接来训练更新参数
        lnn = enumerate(range(0, len(train_data) - 1, batch_size))  # 在遍历中可以获得索引和元素值
        start_time2 = time.time()
        for batch_index, i in lnn:   # 遍历每个批量大小的数据
            data, targets = get_batch(train_data, i, batch_size)  # 批量方法:在一个epoch中将参数在每个训练数据上需要更新的值记录下来,最后叠加在一起再对参数进行更新
            optimizer.zero_grad()   # 清空之前的梯度


            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                                 torch.zeros(1, 1, model.hidden_layer_size))


            output = model(data)   # 将数据装入model得到输出
            loss = criterion(output, targets)   # 将输出和目标数据传入损失函数对象
            loss.backward()   # 反向传播,计算当前梯度
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7)   # 进行梯度规范化,防止出现梯度消失或爆炸,限定梯度值在-0.7到0.7之间
            optimizer.step()   # 根据梯度更新网络参数。梯度叠加就是每次获取1个batch的数据,计算1次梯度,梯度不清空,不断累加,累加到一定次数后,根据累加的梯度更新网络参数,然后清空梯度,进入下一次循环

        scheduler.step()   # 每个epoch结束时,调用优化器步长调节方法,用来衰减学习率
        end_time2 = time.time()
        print("epoch:{} 耗时(单位秒):{}".format(epoch,(end_time2 - start_time2)))
        if i % 25 == 1:
            print(f'epoch: {i:3} loss: {loss.item():10.8f}')


def plot_and_loss(eval_model, data_source, epoch,name):#测试集测试与绘图
    eval_model.eval()  # 不启用BN层和dropout,保证BN层能够用到全部训练数据的均值和方差,即测试过程中要保证BN层的均值和方差不变,对于Dropout,用到了所有网络连接,即不进行随机舍弃神经元
    total_loss = 0.
    test_result = torch.Tensor(0)  # 是python类,默认张量类型(将数据类型转化为张量,后面用cuda跑更快)
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i, 1)
            output = eval_model(data)

            model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))


            total_loss += criterion(output, target).item()   # 将每个批量数据的损失相加获得总的损失。item()作用是取出单元素张量的元素值并返回该值,保持该元素类型不变,使用item()函数取出的元素值的精度更高
            test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0)  # 每个循环产生1个节点,最后拼接到一起
            truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)  # 按维数0(行)拼接。X.view(-1)中的-1本意是根据另外一个数来自动调整维度,但是这里只有一个维度,因此就会将X里面的所有维度数据转化为一维,并且按先后顺序排列
    line1, = plt.plot(test_result, color="red")
    line2, = plt.plot(truth, color="blue")
    line3, = plt.plot(truth-test_result, color="green")
    plt.legend([line1, line2, line3], ['测试集预测值', '测试集实际值', '预测值-实际值差值'], loc='upper right')
    plt.xlabel(u"样本序号(单位:秒)")
    plt.ylabel(u"变化趋势")
    plt.grid(True, which='both')
    plt.axhline(y=0, color='k')
    plt.savefig('./'+name+'_训练_epoch%d.png' % epoch)
    plt.close()
    return total_loss / i  # 只是将值返回到调用函数的代码行,实际作用没用上


window_h = 20
batch_h = 512
epochs_h = 100

output_window = torch.tensor(1,dtype=torch.int32)
input_window = torch.tensor(int(window_h),dtype=torch.int32)
batch_size = torch.tensor(int(batch_h ),dtype=torch.int32)

#训练集
N_Training = 190000
N_reality_Begin = 190000
N_reality_end = 210000
names = ['Coal','electric','Inlet flow','outlet temperature','differential pressure','Outlet flow']

for i in range(0, 6):
    name = names[i]
    print("----------------{} 开始训练--------------------".format(name))
    fm1= fm[fm['类别']==0]
    fm1= fm1[0:N_Training]
    data = fm1[name]
    train_data, val_data = get_data(data,0.7)
    train_data.to(device)
    val_data.to(device)
    model = LSTM().to(device)
    criterion = nn.MSELoss()
    lr = 0.01
    epochs = torch.tensor(int(epochs_h),dtype=torch.int32)
    epochs.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)   # 优化器
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95)  # 学习率自动调节器,每过1次,更新一次学习率,每执行一次scheduler.step()为1次,gamma为学习率下降的乘数因子
    train(train_data,epochs)
    plot_and_loss(model, val_data, epochs,name)
    torch.save(model,'model_'+name)
    print("----------------{} 训练结束--------------------".format(name))


print('------------------------模型训练结束---------------------------------')

print(model)




你可能感兴趣的:(随笔,lstm,python,人工智能)