LSTM预测访问流量

python tensorflow

数据

index	date	uv
0	20180601/01:00	716466
1	20180601/02:00	998545
2	20180601/03:00	1172553
3	20180601/04:00	1308438
4	20180601/05:00	1464275
···	···	···
1441	20190630/00:00	12678528

Python代码

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation

def load_data(file_name, sequence_length=10, split=0.883):
    df = pd.read_csv(file_name, sep=',')
    datedata = pd.DataFrame(df['date'])
    #timedata = pd.DataFrame(df['time'])
    uvdata = pd.DataFrame(df['uv'])
    
    # 处理uv,float -> scaler
    uvdata_arr = np.array(uvdata).astype(float)
    #timedata_arr = np.array(timedata).astype(int)
    scaler = MinMaxScaler()
    uvdata_scaler = scaler.fit_transform(uvdata_arr)
    
    uv_tmp = []
    for i in range(len(uvdata_scaler) - sequence_length):
        uv_tmp.append(uvdata_scaler[i: i + sequence_length + 1])

    reshaped_uvdata = np.array(uv_tmp).astype('float64')
    #np.random.shuffle(reshaped_uvdata)  # 随机打乱顺序

    x = reshaped_uvdata[:, :-1] # 1-10
    y = reshaped_uvdata[:, -1]  # 11
    time = datedata[sequence_length:] # 11对应的日期，用于画图
    # 切分数据
    split_boundary = int(reshaped_uvdata.shape[0] * split)
    train_x = x[: split_boundary]
    test_x = x[split_boundary:]
    train_y = y[: split_boundary]
    test_y = y[split_boundary:]
    test_time = time[split_boundary:]
    
    return  df, x, y, train_x, train_y, test_x, test_y, test_time, scaler

def build_model():
    # input_dim是输入的train_x的最后一个维度，train_x的维度为(n_samples, time_steps, input_dim)
    model = Sequential()
    model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True))
    print(model.layers)
    model.add(LSTM(100, return_sequences=False))
    model.add(Dense(output_dim=1))
    model.add(Activation('linear'))

    model.compile(loss='mse', optimizer='rmsprop')
    return model

def train_model(train_x, train_y, test_x, test_y):
    model = build_model()

    try:
        model.fit(train_x, train_y, batch_size=256, nb_epoch=300, validation_split=0.1)
        predict = model.predict(test_x)
        predict = np.reshape(predict, (predict.size, ))
    except KeyboardInterrupt:
        print(predict)
        print(test_y)
    #print(predict)
    #print(test_y)
    try:
        fig = plt.figure(1)
        plt.figure(figsize=(20, 10))
        plt.plot(predict, 'r:')
        plt.plot(test_y, 'g-')
        plt.xlabel('date')
        plt.ylabel('uv_scaler')
        plt.legend(['predict', 'true'])
    except Exception as e:
        print(e)
    return predict, test_y

if __name__ == '__main__':
    df, x, y, train_x, train_y, test_x, test_y, test_time, scaler = load_data('data/uvdata18192.csv')
    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
    test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
    predict_y, test_y = train_model(train_x, train_y, test_x, test_y)
    predict_y = scaler.inverse_transform([[i] for i in predict_y])
    test_y = scaler.inverse_transform(test_y)
    
    n = len(test_time)
    x = range(1,n+1,4)
    
    fig = plt.figure(2)
    plt.figure(figsize=(20, 10))
    plt.plot(predict_y, 'r:')
    plt.plot(test_y, 'g-')
    plt.xticks(x,test_time['date'][::4],rotation=60)
    plt.xlabel('date')
    plt.ylabel('uv')
    plt.legend(['predict', 'true'])

Train on 1135 samples, validate on 127 samples
Epoch 1/300
1135/1135 [==============================] - 19s 17ms/step - loss: 0.0209 - val_loss: 0.0311
Epoch 2/300
1135/1135 [==============================] - 0s 371us/step - loss: 0.0144 - val_loss: 0.0297
Epoch 3/300
1135/1135 [==============================] - 0s 373us/step - loss: 0.0135 - val_loss: 0.0292
Epoch 4/300
1135/1135 [==============================] - 0s 376us/step - loss: 0.0133 - val_loss: 0.0252
Epoch 5/300
1135/1135 [==============================] - 0s 381us/step - loss: 0.0115 - val_loss: 0.0226
······
Epoch 300/300
1135/1135 [==============================] - 0s 396us/step - loss: 0.0034 - val_loss: 0.0344

预测结果

image.png

LSTM预测访问流量

数据

Python代码

预测结果

你可能感兴趣的:(LSTM预测访问流量)