参考论文:https://www.nature.com/articles/srep01684
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import quandl
%matplotlib inline
import warnings; warnings.simplefilter('ignore') #忽略可能会出现的警告信息,警告并不是错误,可以忽略
1. 策略思想
1.如果当周的谷歌Debt搜索量 > 过去三周平均,则做空道琼斯指数,持仓一周;
2.如果当周的谷歌Debt搜索量 < 过去三周平均,则做多道琼斯指数,持仓一周;
2. 数据整理
2.1 读取论文数据
paper = pd.read_csv('D:\material\paper_data.csv',
sep=' ',
parse_dates=True)
paper.head()
data = pd.DataFrame({'Google_week': paper['Google End Date'],
'Debt': paper['debt'].astype(np.float64),
'Date': paper['DJIA Date'],
'DJClose': paper['DJIA Closing Price'].astype(np.float64)})
data['Date'] = pd.to_datetime(data['Date']) #转换为时间序列数据格式才能进行后续的合并等操作;
data['Google_week'] = pd.to_datetime(data['Google_week'])
2.2 读取我们自己下载的谷歌搜索指数数据
注意:论文数据作者有自己做过处理,Normalize,虽然和我们自己下载的数据有非常高的相关性,但是仍然有差别;
trends_download = pd.read_csv('data/debt_google_trend.csv')
trends_download['Week'] = trends_download['Week'].apply(lambda x: pd.to_datetime(x.split(' ')[-1]))
all_data = pd.merge(data, trends_download,
left_on='Google_week', right_on='Week') #pandas知识内容;
all_data.drop('Week', inplace=True, axis=1)
all_data.set_index('Date', inplace=True)
all_data.rename(columns = {'Debt':'Debt_paper',
'debt':'Debt_download'}, inplace = True)
both_trends = all_data[['Google_week', 'Debt_paper', 'Debt_download']].set_index('Google_week')
#作者的数据是经过normalized的,跟我们再google trend数据库里面下载的数据是几乎一致的;
both_trends.corr()
3. 交易信号和交易逻辑
all_data = all_data.reset_index().set_index('Google_week')
all_data['MA_p'] = all_data['Debt_paper'].shift(1).rolling(window = 3).mean()
all_data['MA_d'] = all_data['Debt_download'].shift(1).rolling(window = 3).mean()
#产生策略的交易信号;
all_data['signal_p'] = np.where(all_data['Debt_paper'] > all_data['MA_p'], -1, 1)
all_data['signal_d'] = np.where(all_data['Debt_download'] > all_data['MA_d'], -1, 1)
all_data.loc[:3, ['signal_p','signal_d']] = 0
4. 计算策略收益并可视化
all_data['pct_change'] = all_data['DJClose'].pct_change()
all_data['ret_p'] = all_data['pct_change'] * all_data['signal_p'].shift(1)
all_data['ret_d'] = all_data['pct_change'] * all_data['signal_d'].shift(1)
#计算累积收益;
all_data['cumret_p'] = (1 + all_data.ret_p).cumprod()
all_data['cumret_d'] = (1 + all_data.ret_d).cumprod()
all_data[['cumret_p', 'cumret_d']].tail(10)
all_data[['cumret_p', 'cumret_d']].plot(figsize=(12,6));