LSTM之时间序列预测

#-*- coding: utf-8 -*-

import os
import sys
import time
import numpy as np
from keras.models import Sequential
from keras.models import load_model
from numpy import newaxis
import matplotlib.pyplot as plt
from keras.layers import LSTM,Dense,Activation,Dropout

reload(sys)
sys.setdefaultencoding('utf-8')


def load_data():
    data = open('data.txt', 'r').read()
    data=data.split('\n')
    sequence_length = 50
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length+1])
    print np.shape(result)
    result = np.array(result)
    row = int(round(0.9 * result.shape[0]))
    train = result[:row, :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[row:, :-1]
    y_test = result[row:, -1]
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
    return x_train, y_train, x_test, y_test


def build_model(layers=[1, 50, 100, 1]):
    model = Sequential()
    model.add(LSTM(
        input_shape=(50,1),
        units=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        units=layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        units=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print "Compilation Time : ", time.time() - start
    return model

def predict_point_by_point(model, data):
    #直接对测试数据进行预测
    predicted = model.predict(data)
    predicted = np.reshape(predicted, (predicted.size,))
    return predicted

def predict_sequence_full(model, data, window_size):
    #将每次新预测的值作为最后一个,插入到下一个预测队列中
    curr_frame = data[0]
    predicted = []
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, window_size-2, predicted[-1], axis=0)
    return predicted

def predict_sequences_multiple(model, data, window_size, prediction_len):
    #每prediction_len步后重新获取初始预测数据,每prediction_len内将本次预测插入预测数据作为下一次预测的数据
    prediction_seqs = []
    for i in range(int(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, window_size-2, predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs


def plot_results(predicted_data, true_data):
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.plot(predicted_data, label='Prediction')
    plt.legend()
    plt.show()


if __name__=='__main__':
    epochs  = 5
    seq_len = 50
    print 'Loading data... '
    X_train, y_train, X_test, y_test = load_data()
    print '\nData Loaded. Compiling...\n'
    model = build_model([1, 50, 100, 1])
    model.fit(
        X_train,
        y_train,
        batch_size=512,
        epochs=epochs,
        validation_split=0.05)
    # model.save('./lstm_time_series.model')
    # model=load_model('./lstm_time_series.model')
    predicted = predict_point_by_point(model, X_test)
    sub=np.array(predicted).astype('float64')-np.array(y_test).astype('float64')
    print np.sqrt(np.sum(map(lambda x:x**2,sub)))
    plot_results(predicted,y_test)

你可能感兴趣的:(数据挖掘,机器学习)