python
tensorflow
数据
index | date | uv |
---|---|---|
0 | 20180601/01:00 | 716466 |
1 | 20180601/02:00 | 998545 |
2 | 20180601/03:00 | 1172553 |
3 | 20180601/04:00 | 1308438 |
4 | 20180601/05:00 | 1464275 |
··· | ··· | ··· |
1441 | 20190630/00:00 | 12678528 |
Python代码
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation
def load_data(file_name, sequence_length=10, split=0.883):
df = pd.read_csv(file_name, sep=',')
datedata = pd.DataFrame(df['date'])
#timedata = pd.DataFrame(df['time'])
uvdata = pd.DataFrame(df['uv'])
# 处理uv,float -> scaler
uvdata_arr = np.array(uvdata).astype(float)
#timedata_arr = np.array(timedata).astype(int)
scaler = MinMaxScaler()
uvdata_scaler = scaler.fit_transform(uvdata_arr)
uv_tmp = []
for i in range(len(uvdata_scaler) - sequence_length):
uv_tmp.append(uvdata_scaler[i: i + sequence_length + 1])
reshaped_uvdata = np.array(uv_tmp).astype('float64')
#np.random.shuffle(reshaped_uvdata) # 随机打乱顺序
x = reshaped_uvdata[:, :-1] # 1-10
y = reshaped_uvdata[:, -1] # 11
time = datedata[sequence_length:] # 11对应的日期,用于画图
# 切分数据
split_boundary = int(reshaped_uvdata.shape[0] * split)
train_x = x[: split_boundary]
test_x = x[split_boundary:]
train_y = y[: split_boundary]
test_y = y[split_boundary:]
test_time = time[split_boundary:]
return df, x, y, train_x, train_y, test_x, test_y, test_time, scaler
def build_model():
# input_dim是输入的train_x的最后一个维度,train_x的维度为(n_samples, time_steps, input_dim)
model = Sequential()
model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True))
print(model.layers)
model.add(LSTM(100, return_sequences=False))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer='rmsprop')
return model
def train_model(train_x, train_y, test_x, test_y):
model = build_model()
try:
model.fit(train_x, train_y, batch_size=256, nb_epoch=300, validation_split=0.1)
predict = model.predict(test_x)
predict = np.reshape(predict, (predict.size, ))
except KeyboardInterrupt:
print(predict)
print(test_y)
#print(predict)
#print(test_y)
try:
fig = plt.figure(1)
plt.figure(figsize=(20, 10))
plt.plot(predict, 'r:')
plt.plot(test_y, 'g-')
plt.xlabel('date')
plt.ylabel('uv_scaler')
plt.legend(['predict', 'true'])
except Exception as e:
print(e)
return predict, test_y
if __name__ == '__main__':
df, x, y, train_x, train_y, test_x, test_y, test_time, scaler = load_data('data/uvdata18192.csv')
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
predict_y, test_y = train_model(train_x, train_y, test_x, test_y)
predict_y = scaler.inverse_transform([[i] for i in predict_y])
test_y = scaler.inverse_transform(test_y)
n = len(test_time)
x = range(1,n+1,4)
fig = plt.figure(2)
plt.figure(figsize=(20, 10))
plt.plot(predict_y, 'r:')
plt.plot(test_y, 'g-')
plt.xticks(x,test_time['date'][::4],rotation=60)
plt.xlabel('date')
plt.ylabel('uv')
plt.legend(['predict', 'true'])
Train on 1135 samples, validate on 127 samples
Epoch 1/300
1135/1135 [==============================] - 19s 17ms/step - loss: 0.0209 - val_loss: 0.0311
Epoch 2/300
1135/1135 [==============================] - 0s 371us/step - loss: 0.0144 - val_loss: 0.0297
Epoch 3/300
1135/1135 [==============================] - 0s 373us/step - loss: 0.0135 - val_loss: 0.0292
Epoch 4/300
1135/1135 [==============================] - 0s 376us/step - loss: 0.0133 - val_loss: 0.0252
Epoch 5/300
1135/1135 [==============================] - 0s 381us/step - loss: 0.0115 - val_loss: 0.0226
······
Epoch 300/300
1135/1135 [==============================] - 0s 396us/step - loss: 0.0034 - val_loss: 0.0344