LSTM:利用LSTM进行序列预测

文章目录

  • 1. 命令行参数解析
  • 2. K折验证
  • 3. 数据准备
  • 4. 模型准备
  • 5. 训练函数
  • 6. 测试函数

1. 命令行参数解析

main.py:

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Run LSTM(LSTM Network)")
    parser.add_argument('-m', '--mode', type=str, choices=['train', 'test'], default='train',help='Run mode(train/choice)')
    parser.add_argument('-c', '--config', type=str, required=True, help='Model config Path')
    parser.add_argument('--cuda', type=bool, default=False, help='CUDA')
    args = parser.parse_args('--config ./experiment/config/HuJangWord/lstm.json'.split())

    config_path = args.config
    config = get_config(config_path)	# 注意这里的config_path是相对于main.py来说的

    if config["K_Flod"] == 0:			#不需要K折训练
        start_time = datetime.datetime.now()
        df_train, df_test, data_frame = No_Flod()
        model, train_avg_loss, df_train_loss = train(df_train, data_frame, config["num_epochs"], config["learning_rate"],
               config['weight_decay'], config["batch_size"])
        test_avg_loss, min_test_loss = test(model, df_test, data_frame, config["batch_size"], df_train_loss)

        end_time = datetime.datetime.now()
        print(f"LSTM模型训练所用时间为{(end_time - start_time).seconds // 60}分钟{(end_time - start_time).seconds % 60}秒"
              f"平均训练损失为{train_avg_loss},平均测试损失为{test_avg_loss}")
    else:								# K折训练
        K_Flod(config["K_Flod"], num_EPOCHS=config["num_epochs"], Learning_rate=config["learning_rate"],
               Weight_decay=config['weight_decay'], batch_size=config["batch_size"])

2. K折验证

def K_Flod(k, num_EPOCHS=20, Learning_rate=0.01, Weight_decay=0.1, batch_size=256):
    train_avg_loss, test_avg_loss = 0, 0
    data_frame = pd.read_csv(os.path.join(file_root, file_name))
    start_time = datetime.datetime.now()

    min_test_loss = float('inf')
    for i in range(k):
        print(f"接下来进入第{i+1}折训练:")
        data = get_k_flod_data(k, i) # 获取第i折数据
        model, train_k_loss, df_train_loss = train(data[0], data_frame, num_EPOCHS, Learning_rate, Weight_decay, batch_size)

        test_k_loss, min_test_loss = test(model, data[1], data_frame, batch_size, df_train_loss, min_test_loss)

        train_avg_loss += train_k_loss
        test_avg_loss += test_k_loss
    train_avg_loss /= k
    test_avg_loss /= k
    cur_time = datetime.datetime.now()
    print(f"LSTM模型{k}折所用时间为{(cur_time - start_time).seconds//60}分钟{(cur_time - start_time).seconds%60}秒"
          f"平均训练损失为{train_avg_loss},平均测试损失为{test_avg_loss}")

3. 数据准备

def get_k_flod_data(k, i):
    """
    As the function name says
    :return:
    """
    assert k > 1

    temp_data_frame = pd.read_csv(os.path.join(file_root, temporary_data_file_name))

    fold_size = temp_data_frame.shape[0] // k  # 每份的个数:数据总条数/折数(向下取整)

    df_train = temp_data_frame.iloc[np.r_[:fold_size * i, fold_size * (i + 1):]].reset_index(drop=True)
    df_test = temp_data_frame[fold_size*i:fold_size*(i+1)]

    return df_train, df_test
def No_Flod():
	# 不需要K折训练的时候进行分隔数据集
    data_frame = pd.read_csv(os.path.join(file_root, file_name))
    temp_data_frame = pd.read_csv(os.path.join(file_root, temporary_data_file_name))

    index = temp_data_frame.index.values
    random.shuffle(index)

    cri_poi = 0.2*temp_data_frame.shape[0]

    df_train = temp_data_frame.iloc[np.r_[index[cri_poi:]]].reset_index(drop=True)
    df_test = temp_data_frame.iloc[np.r_[index[:cri_poi]]].reset_index(drop=True)

    return df_train, df_test, data_frame
class EduDataSet(Dataset):			# 数据集封装
    def __init__(self, df_part, data_frame):
        self.data_frame = data_frame
        self.temp_data_frame = df_part

    def __getitem__(self, item):
        data = self.temp_data_frame.iloc[item].tolist()
        input_x = []
        for i in range(6):
            index = int(data[i+2])
            temp = [0] * 9
            if index != -1:
                temp = self.data_frame.iloc[index].tolist()[2:]
            input_x.append(temp)
        input_x = torch.tensor(input_x)
        return input_x, data[1]

    def __len__(self):
        return self.temp_data_frame.shape[0]

4. 模型准备

这里模型只使用了一个LSTM和一个线性层。

class LSTMpred(nn.Module):
    def __init__(self, input_size, hidden_dim):
        super(LSTMpred, self).__init__()
        self.input_dim = input_size
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_size, hidden_dim)
        self.hidden2out = nn.Linear(hidden_dim, 1)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 256, self.hidden_dim, requires_grad=True, dtype=torch.double),
                torch.zeros(1, 256, self.hidden_dim, requires_grad=True, dtype=torch.double))

    def forward(self, x):
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        x = lstm_out[-1].squeeze(0)
        x = self.hidden2out(x)
        return x

5. 训练函数

def train(df_train, data_frame, num_EPOCHS, Learning_rate, Weight_decay, batch_size):
    model = LSTMpred(9, 6).double()
    optimizer = optime.SGD(model.parameters(), lr=Learning_rate, weight_decay=Weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.2)
    loss_arr = []

    train_sets = EduDataSet(df_train, data_frame)
    train_loader = DataLoader(train_sets, batch_size=batch_size, shuffle=True, num_workers=20, drop_last=True, pin_memory=True)

    start_time = datetime.datetime.now()
    min_train_loss = float('inf')
    for epo in range(num_EPOCHS):
        temp_loss_arr = []
        temp_loss_epo = 0
        for index, (inputs, labels) in enumerate(train_loader):
            inputs = torch.transpose(inputs, 0, 1)

            model.hidden = model.init_hidden()

            out = model(inputs.double())

            loss = loss_function(out.squeeze(-1), labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            temp_loss_epo += loss.item()
            if index % 100 == 0:
                temp_loss_arr.append(loss)
                cur_time = datetime.datetime.now()
                print(f"当前训练循环为{epo}:{index}, 目前耗时为{(cur_time-start_time).seconds},损失为{loss}")
        # 计算这一回合的平均损失并更新当前折最小训练损失
        temp_loss_epo /= len(train_loader)
        if min_train_loss > temp_loss_epo:
            min_train_loss = temp_loss_epo

        scheduler.step()
        loss_arr.append(temp_loss_arr)

    df = pd.DataFrame(loss_arr)             # 记录该折的损失下降过程

    return model, min_train_loss, df

6. 测试函数

def test(model, df_test, data_frame, batch_size, df_train_loss, min_test_loss=float('inf')):
    test_sets = EduDataSet(df_test, data_frame)
    test_loader = DataLoader(test_sets, batch_size=batch_size, shuffle=False, num_workers=20, drop_last=True, pin_memory=True)

    with torch.no_grad():
        test_loss = 0
        for index, (inputs, labels) in enumerate(test_loader):
            inputs = torch.transpose(inputs, 0, 1)

            out = model(inputs.double())
            test_loss += loss_function(out.squeeze(-1), labels)

            if index%100 == 0:
                print(f"当前测试循环为{index},损失为{test_loss}")
        test_loss = test_loss/len(test_loader)
        print(f"测试完成,平均损失值为{test_loss}")

    if min_test_loss > test_loss:
        min_test_loss = test_loss
        df_train_loss.to_csv(os.path.join(file_root, file_name), index=False, encoding="utf-8")
        torch.save(model.state_dict(), os.path.join(file_root, model_name))

        with open(os.path.join(file_root, file_log), 'a') as f:
            curtime = datetime.datetime.now()
            f.write(f"当前时间为:{curtime}, test_min_loss: {test_loss}, train_loss_decline: {file_name}\n")
    return test_loss, min_test_loss

你可能感兴趣的:(深度学习,深度学习,神经网络,pytorch)