kaggle地址: https://www.kaggle.com/
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# -*- coding: utf-8 -*-
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dropout, Dense, SimpleRNN
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
# 归一化
sc = MinMaxScaler(feature_range=(0, 1)) # 定义归一化:归一化到(0,1)之间
def get_stock_data(file_path):
maotai = pd.read_csv(file_path)
training_set = maotai.iloc[0:2426 - 300, 2:3].values
test_set = maotai.iloc[2426 - 300:, 2:3].values
training_set_scaled = sc.fit_transform(training_set)
test_set_scaled = sc.transform(test_set)
x_train = []
y_train = []
for i in range(60, len(training_set_scaled)):
x_train.append(training_set_scaled[i - 60:i, 0])
y_train.append(training_set_scaled[i, 0])
np.random.seed(7)
np.random.shuffle(x_train)
np.random.seed(7)
np.random.shuffle(y_train)
x_train = np.array(x_train)
y_train = np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], 60, 1))
x_test = []
y_test = []
for i in range(60, len(test_set_scaled)):
x_test.append(test_set_scaled[i - 60:i, 0])
y_test.append(test_set_scaled[i, 0])
x_test = np.array(x_test)
y_test = np.array(y_test)
x_test = np.reshape(x_test, (x_test.shape[0], 60, 1))
return (x_train, y_train), (x_test, y_test)
def load_local_model(model_path):
if os.path.exists(model_path + '/saved_model.pb'):
print(datetime.datetime.now())
local_model = tf.keras.models.load_model(model_path)
else:
local_model = tf.keras.Sequential([
SimpleRNN(80, return_sequences=True),
Dropout(0.2),
SimpleRNN(100),
Dropout(0.2),
Dense(1)
])
local_model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss='mean_squared_error') # 损失函数用均方误差
return local_model
def show_train_line(history):
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
def stock_predict(model, x_test, y_test):
# 测试集输入模型进行预测
predicted_stock_price = model.predict(x_test)
# 对预测数据还原---从(0,1)反归一化到原始范围
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
# 对真实数据还原---从(0,1)反归一化到原始范围
real_stock_price = sc.inverse_transform(np.reshape(y_test, (y_test.shape[0], 1)))
# 画出真实数据和预测数据的对比曲线
plt.plot(real_stock_price, color='red', label='MaoTai Stock Price')
plt.plot(predicted_stock_price, color='blue', label='Predicted MaoTai Stock Price')
plt.title('MaoTai Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('MaoTai Stock Price')
plt.legend()
plt.show()
plt.savefig('./model/rnn/compare.jpg')
mse = mean_squared_error(predicted_stock_price, real_stock_price)
rmse = math.sqrt(mean_squared_error(predicted_stock_price, real_stock_price))
mae = mean_absolute_error(predicted_stock_price, real_stock_price)
print('均方误差: %.6f' % mse)
print('均方根误差: %.6f' % rmse)
print('平均绝对误差: %.6f' % mae)
if __name__ == '__main__':
file_path = '/kaggle/input/databases/SH600519.csv'
(x_train, y_train), (x_test, y_test) = get_stock_data(file_path)
model_path = "./model/rnn"
model = load_local_model(model_path)
history = model.fit(x_train, y_train, batch_size=265, epochs=100, validation_data=(x_test, y_test),validation_freq=1)
show_train_line(history)
model.summary()
model.save(model_path, save_format="tf")
stock_predict(model, x_test, y_test)
下载训练后的模型
(1) 保存(File–>Save Version)
Save & Run All(Commit) 一定要选这个
Advanced Settings–>Always save out 或者Save output for this version
(下面加速自己选择)
保存后等他运行完
找到刚才保存的项目
下载完成