import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
dataset = pd.read_csv('D:/demo/result/imfs.csv',encoding='utf8')
index = 0 #设置选取数据集的列索引
timespan = 3 #设置选取的时间跨度
size = 7 #设置预测的记录数
def createDataset(dataset,timespan):
x,y = [],[]
arr = dataset.values[:,index]
arr = arr.astype(float)
for i in range(len(arr)-timespan):
x.append(arr[i:i+timespan])
y.append(arr[i+timespan])
x = np.array(x)
y = np.array(y)
return x,y
x,y = createDataset(dataset,timespan)
#归一化处理
scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
X = scaler.fit_transform(x)
Y = scaler.fit_transform(y.reshape(-1,1))
def dividedDataset(X,Y,size):
num = len(Y)-size
x_train,x_test = X[0:num,:],X[num:len(X),:]
y_train,y_test = Y[0:num,:],Y[num:len(Y),:]
return x_train,x_test,y_train,y_test
x_train,x_test,y_train,y_test = dividedDataset(X,Y,size)
#转换成LSTM网络需要的输入向量格式
trainX = np.reshape(x_train,(x_train.shape[0],1,x_train.shape[1]))
testX = np.reshape(x_test,(x_test.shape[0],1,x_test.shape[1]))
#训练模型
inputnum = timespan
hiddennum = 10
model = Sequential()
model.add(LSTM(hiddennum,input_shape=(1,inputnum)))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(trainX,y_train,epochs=100,batch_size=1,verbose=2)
#模型检验
ptrain = model.predict(trainX)
predict_train = scaler.inverse_transform(ptrain)
actual_train = scaler.inverse_transform(y_train)
train_rmse = np.sqrt(mean_squared_error(actual_train,predict_train))
ptest = model.predict(testX)
predict_test = scaler.inverse_transform(ptest)
actual_test = scaler.inverse_transform(y_test)
test_rmse = np.sqrt(mean_squared_error(actual_test,predict_test))
print('---结果如下---')
print('train RMSE = ',round(train_rmse,4),' test RMSE = ',round(test_rmse,4))
#结果可视化
data = dataset.values[:,index].astype(float)
trainplot = np.empty_like(data)
trainplot[:] = np.nan
trainplot[timespan:timespan+len(predict_train)] = predict_train.ravel()
testplot = np.empty_like(data)
testplot[:] = np.nan
testplot[len(predict_train)+timespan:len(data)] = predict_test.ravel()
fig,ax = plt.subplots(figsize=(15,6))
ax.plot(data)
ax.plot(trainplot)
ax.plot(testplot)
plt.show()