预测外汇汇率可以使用很多不同的机器学习或深度学习模型。这里以英镑对美元为例,采用最简单的线性回归模型作为将来更加复杂模型的基准。
本文的全部代码可以在我的Github上找到。
import os
import sys
import uuid
import time
import json
from pathlib import Path
##### Import my module build from python file
module_path = str(Path.cwd() / "py")
debug_data_path = str(Path.cwd() / "debug_data")
if module_path not in sys.path:
sys.path.append(module_path)
import build as build
import utils
from utils import get_module_version
log = utils.setup_logging(__name__)
##### Import common machine learning modules
import numpy as np
import pandas as pd
import pickle
log.debug('numpy version: %s' % np.__version__)
##### Import sklearn
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
log.debug('sklearn version: %s' % sklearn.__version__)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
MODEL_WINDOW_SIZE = 60
# Sklearn test_size
SKLEARN_TEST_SIZE = 0.20
debug_data_csv = 'debug_data/EURUSD_M1_202012150519_202103251444.csv'
def get_training_float_data_from_exported_csv() :
history_dataset = pd.read_csv(debug_data_csv, sep='\t')
#history_dataset['DATE_TIME'] = history_dataset[''] + ' ' + history_dataset['
values = history_dataset['' ].values
log.debug(type(values.tolist()))
return values.tolist()
float_data = get_training_float_data_from_exported_csv()
X, y = build.get_feature_label_sklearn(float_data, default_window_size=MODEL_WINDOW_SIZE)
print("Features: " + str(len(X[-1])))
# Splitting the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = SKLEARN_TEST_SIZE, random_state=None, shuffle=False)
log.debug(type(X_test[0]))
print(X_test[0])
print(pd.DataFrame(X_test))
scaler = MinMaxScaler()
model = LinearRegression()
pipeline = Pipeline([('scaler', scaler), ('lr', model)])
type(pipeline)
pipeline.fit(X_train, y_train)
# model.fit(X_train, y_train)
log.debug("Linear regression score: %f" % model.score(X_test, y_test))
log.debug("Coeffience: ")
log.debug(model.coef_)
log.debug("Intercept: %f" % model.intercept_)
# Save model
sklearn_model_file_pickle = 'saved_model/ff_sklearn_pickle.pkl'
build.save_sklearn_model(model, sklearn_model_file_pickle)
# Load model
model = build.load_sklearn_model(sklearn_model_file_pickle)
# Predict X_test, all test samples
y_pred = pipeline.predict(X_test)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('R2 score:', metrics.r2_score(y_test, y_pred))
样例输出为:
Mean Absolute Error: 8.40318366615625e-05
Mean Squared Error: 1.6048633406592367e-08
Root Mean Squared Error: 0.00012668320096442294
R2 score: 0.9990856998275862