配对交易策略 Pair Trading
0. 引库
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt.style.use('seaborn')
%matplotlib inline
data = pd.read_csv('pair-trade-data.csv')
data.set_index('date',inplace = True)
data.head()
|
000568 |
000858 |
date |
|
|
2010/1/4 |
27.488118 |
26.117536 |
2010/1/5 |
27.335123 |
26.391583 |
2010/1/6 |
26.941707 |
25.694008 |
2010/1/7 |
26.388011 |
24.913389 |
2010/1/8 |
26.825140 |
24.863562 |
data.plot(figsize=(8, 6));
2. 策略开发思路
data['priceDelta'] = data['000568'] - data['000858']
data.head()
|
000568 |
000858 |
priceDelta |
date |
|
|
|
2010/1/4 |
27.488118 |
26.117536 |
1.370582 |
2010/1/5 |
27.335123 |
26.391583 |
0.943540 |
2010/1/6 |
26.941707 |
25.694008 |
1.247699 |
2010/1/7 |
26.388011 |
24.913389 |
1.474622 |
2010/1/8 |
26.825140 |
24.863562 |
1.961578 |
data['priceDelta'].plot(figsize=(8, 6));
plt.ylabel('Spread')
plt.axhline(data['priceDelta'].mean());
data['zscore'] = (data['priceDelta'] - np.mean(data['priceDelta']))/np.std(data['priceDelta'])
data.head()
|
000568 |
000858 |
priceDelta |
zscore |
date |
|
|
|
|
2010/1/4 |
27.488118 |
26.117536 |
1.370582 |
0.569895 |
2010/1/5 |
27.335123 |
26.391583 |
0.943540 |
0.500520 |
2010/1/6 |
26.941707 |
25.694008 |
1.247699 |
0.549932 |
2010/1/7 |
26.388011 |
24.913389 |
1.474622 |
0.586796 |
2010/1/8 |
26.825140 |
24.863562 |
1.961578 |
0.665903 |
len(data[data['zscore'] > 1.5])
17
data['position_1'] = np.where(data['zscore'] > 1.5, -1, np.nan)
data['position_1'] = np.where(data['zscore'] < -1.5, 1, data['position_1'])
data['position_1'] = np.where(abs(data['zscore']) < 0.5, 0, data['position_1'])
data.head()
|
000568 |
000858 |
priceDelta |
zscore |
position_1 |
date |
|
|
|
|
|
2010/1/4 |
27.488118 |
26.117536 |
1.370582 |
0.569895 |
NaN |
2010/1/5 |
27.335123 |
26.391583 |
0.943540 |
0.500520 |
NaN |
2010/1/6 |
26.941707 |
25.694008 |
1.247699 |
0.549932 |
NaN |
2010/1/7 |
26.388011 |
24.913389 |
1.474622 |
0.586796 |
NaN |
2010/1/8 |
26.825140 |
24.863562 |
1.961578 |
0.665903 |
NaN |
产生交易信号
data['position_1'] = data['position_1'].ffill().fillna(0)
data['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));
data['position_2'] = -np.sign(data['position_1'])
data['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));
3. 计算策略年化收益并可视化
data['returns_1'] = (np.log(data['000568'] / data['000568'].shift(1))).fillna(0)
data['returns_2'] = (np.log(data['000858'] / data['000858'].shift(1))).fillna(0)
data.head(10)
|
000568 |
000858 |
priceDelta |
zscore |
position_1 |
position_2 |
returns_1 |
returns_2 |
date |
|
|
|
|
|
|
|
|
2010/1/4 |
27.488118 |
26.117536 |
1.370582 |
0.569895 |
0.0 |
-0.0 |
0.000000 |
0.000000 |
2010/1/5 |
27.335123 |
26.391583 |
0.943540 |
0.500520 |
0.0 |
-0.0 |
-0.005581 |
0.010438 |
2010/1/6 |
26.941707 |
25.694008 |
1.247699 |
0.549932 |
0.0 |
-0.0 |
-0.014497 |
-0.026787 |
2010/1/7 |
26.388011 |
24.913389 |
1.474622 |
0.586796 |
0.0 |
-0.0 |
-0.020766 |
-0.030852 |
2010/1/8 |
26.825140 |
24.863562 |
1.961578 |
0.665903 |
0.0 |
-0.0 |
0.016430 |
-0.002002 |
2010/1/11 |
25.936311 |
24.631037 |
1.305274 |
0.559285 |
0.0 |
-0.0 |
-0.033696 |
-0.009396 |
2010/1/12 |
26.409867 |
25.336916 |
1.072951 |
0.521543 |
0.0 |
-0.0 |
0.018094 |
0.028255 |
2010/1/13 |
26.577433 |
25.137609 |
1.439824 |
0.581143 |
0.0 |
-0.0 |
0.006325 |
-0.007897 |
2010/1/14 |
28.420660 |
26.109231 |
2.311428 |
0.722738 |
0.0 |
-0.0 |
0.067054 |
0.037924 |
2010/1/15 |
28.253094 |
26.208885 |
2.044209 |
0.679327 |
0.0 |
-0.0 |
-0.005913 |
0.003810 |
data['strategy'] = 0.5*(data['position_1'].shift(1) * data['returns_1']) + 0.5*(data['position_2'].shift(1) * data['returns_2'])
data[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
|
returns_1 |
returns_2 |
strategy |
date |
|
|
|
2019/4/8 |
2.470158 |
3.837651 |
0.986754 |
data[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 6));
Pair trading 策略 - 小范围时间(2013.6-2014.12)
data2 = pd.read_csv('pair-trade-data2.csv')
data2.set_index('date',inplace = True)
data2.head()
|
000568 |
000858 |
date |
|
|
2013/6/3 |
20.719056 |
20.343053 |
2013/6/4 |
20.357220 |
20.060867 |
2013/6/5 |
20.514540 |
20.274644 |
2013/6/6 |
20.113374 |
20.172031 |
2013/6/7 |
19.704342 |
19.667508 |
data2.plot(figsize=(8, 6));
data2['priceDelta'] = data['000568'] - data['000858']
data2.head()
|
000568 |
000858 |
priceDelta |
date |
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
0.376004 |
2013/6/4 |
20.357220 |
20.060867 |
0.296353 |
2013/6/5 |
20.514540 |
20.274644 |
0.239896 |
2013/6/6 |
20.113374 |
20.172031 |
-0.058657 |
2013/6/7 |
19.704342 |
19.667508 |
0.036833 |
data2['priceDelta'].plot(figsize=(8, 6));
plt.ylabel('Spread')
plt.axhline(data2['priceDelta'].mean());
data2['zscore'] = (data2['priceDelta'] - np.mean(data2['priceDelta']))/np.std(data2['priceDelta'])
data2.head()
|
000568 |
000858 |
priceDelta |
zscore |
date |
|
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
0.376004 |
0.048513 |
2013/6/4 |
20.357220 |
20.060867 |
0.296353 |
0.000596 |
2013/6/5 |
20.514540 |
20.274644 |
0.239896 |
-0.033369 |
2013/6/6 |
20.113374 |
20.172031 |
-0.058657 |
-0.212979 |
2013/6/7 |
19.704342 |
19.667508 |
0.036833 |
-0.155532 |
len(data2[data2['zscore'] > 1.5])
40
len(data2[data2['zscore'] < -1.5])
16
data2['position_1'] = np.where(data2['zscore'] > 1.5, -1, np.nan)
data2['position_1'] = np.where(data2['zscore'] < -1.5, 1, data2['position_1'])
data2['position_1'] = np.where(abs(data2['zscore']) < 0.5, 0, data2['position_1'])
data2.head()
|
000568 |
000858 |
priceDelta |
zscore |
position_1 |
date |
|
|
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
0.376004 |
0.048513 |
0.0 |
2013/6/4 |
20.357220 |
20.060867 |
0.296353 |
0.000596 |
0.0 |
2013/6/5 |
20.514540 |
20.274644 |
0.239896 |
-0.033369 |
0.0 |
2013/6/6 |
20.113374 |
20.172031 |
-0.058657 |
-0.212979 |
0.0 |
2013/6/7 |
19.704342 |
19.667508 |
0.036833 |
-0.155532 |
0.0 |
data2['position_1'] = data2['position_1'].ffill().fillna(0)
data2['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));
data2['position_2'] = -np.sign(data2['position_1'])
data2['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6));
data2['returns_1'] = (np.log(data2['000568'] / data2['000568'].shift(1))).fillna(0)
data2['returns_2'] = (np.log(data2['000858'] / data2['000858'].shift(1))).fillna(0)
data2.head(10)
|
000568 |
000858 |
priceDelta |
zscore |
position_1 |
position_2 |
returns_1 |
returns_2 |
date |
|
|
|
|
|
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
0.376004 |
0.048513 |
0.0 |
-0.0 |
0.000000 |
0.000000 |
2013/6/4 |
20.357220 |
20.060867 |
0.296353 |
0.000596 |
0.0 |
-0.0 |
-0.017618 |
-0.013968 |
2013/6/5 |
20.514540 |
20.274644 |
0.239896 |
-0.033369 |
0.0 |
-0.0 |
0.007698 |
0.010600 |
2013/6/6 |
20.113374 |
20.172031 |
-0.058657 |
-0.212979 |
0.0 |
-0.0 |
-0.019749 |
-0.005074 |
2013/6/7 |
19.704342 |
19.667508 |
0.036833 |
-0.155532 |
0.0 |
-0.0 |
-0.020546 |
-0.025329 |
2013/6/13 |
19.562754 |
19.012515 |
0.550239 |
0.153334 |
0.0 |
-0.0 |
-0.007212 |
-0.033871 |
2013/6/14 |
19.617816 |
19.012515 |
0.605301 |
0.186459 |
0.0 |
-0.0 |
0.002811 |
0.000000 |
2013/6/17 |
19.255979 |
18.720423 |
0.535556 |
0.144501 |
0.0 |
-0.0 |
-0.018616 |
-0.015482 |
2013/6/18 |
19.405434 |
18.853192 |
0.552241 |
0.154539 |
0.0 |
-0.0 |
0.007731 |
0.007067 |
2013/6/19 |
19.956054 |
19.269202 |
0.686852 |
0.235521 |
0.0 |
-0.0 |
0.027979 |
0.021826 |
data2['strategy'] = 0.5*(data2['position_1'].shift(1) * data2['returns_1']) + 0.5*(data2['position_2'].shift(1) * data2['returns_2'])
data2[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
|
returns_1 |
returns_2 |
strategy |
date |
|
|
|
2014/12/31 |
0.892955 |
0.97347 |
1.12623 |
data2[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 6));
data2[['returns_1','returns_2','strategy']].dropna().mean() * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.077608
dtype: float64
data2[['returns_1','returns_2','strategy']].dropna().std() * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.057016
dtype: float64
data2['cumret'] = data2['strategy'].dropna().cumsum().apply(np.exp)
data2['cummax'] = data2['cumret'].cummax()
drawdown = (data2['cummax'] - data2['cumret'])
drawdown.max()
0.03645280148896235
Pair trading 策略 - 考虑时间序列平稳性
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt.style.use('seaborn')
%matplotlib inline
1. 数据准备
data3 = pd.read_csv('pair-trade-data2.csv')
data3.set_index('date',inplace = True)
data3.head()
|
000568 |
000858 |
date |
|
|
2013/6/3 |
20.719056 |
20.343053 |
2013/6/4 |
20.357220 |
20.060867 |
2013/6/5 |
20.514540 |
20.274644 |
2013/6/6 |
20.113374 |
20.172031 |
2013/6/7 |
19.704342 |
19.667508 |
data3.plot(figsize=(8,6));
2. 策略开发思路
data3.corr()
|
000568 |
000858 |
000568 |
1.000000 |
0.552409 |
000858 |
0.552409 |
1.000000 |
plt.figure(figsize =(10,8))
plt.title('Stock Correlation')
plt.plot(data['000568'], data['000858'], '.');
plt.xlabel('000568')
plt.ylabel('000858')
data.dropna(inplace = True)
[slope, intercept] = np.polyfit(data3.iloc[:,0], data3.iloc[:,1], 1).round(2)
slope,intercept
(0.51, 7.82)
data3['spread'] = data3.iloc[:,1] - (data3.iloc[:,0]*slope + intercept)
data3.head()
|
000568 |
000858 |
spread |
date |
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
1.956334 |
2013/6/4 |
20.357220 |
20.060867 |
1.858684 |
2013/6/5 |
20.514540 |
20.274644 |
1.992228 |
2013/6/6 |
20.113374 |
20.172031 |
2.094210 |
2013/6/7 |
19.704342 |
19.667508 |
1.798294 |
data3['spread'].plot(figsize = (10,8),title = 'Price Spread');
data3['zscore'] = (data3['spread'] - data3['spread'].mean())/data3['spread'].std()
data3.head()
|
000568 |
000858 |
spread |
zscore |
date |
|
|
|
|
2013/6/3 |
20.719056 |
20.343053 |
1.956334 |
1.452385 |
2013/6/4 |
20.357220 |
20.060867 |
1.858684 |
1.382488 |
2013/6/5 |
20.514540 |
20.274644 |
1.992228 |
1.478078 |
2013/6/6 |
20.113374 |
20.172031 |
2.094210 |
1.551075 |
2013/6/7 |
19.704342 |
19.667508 |
1.798294 |
1.339261 |
data3['zscore'].plot(figsize = (10,8),title = 'Z-score')
plt.axhline(1.5)
plt.axhline(0)
plt.axhline(-1.5)
产生交易信号
data3['position_1'] = np.where(data3['zscore'] > 1.5, 1, np.nan)
data3['position_1'] = np.where(data3['zscore'] < -1.5, -1, data3['position_1'])
data3['position_1'] = np.where(abs(data3['zscore']) < 0.5, 0, data3['position_1'])
data3['position_1'] = data3['position_1'].ffill().fillna(0)
data3['position_1'].plot(ylim=[-1.1, 1.1], figsize=(10, 6),title = 'Trading Signal_Uptrade');
data3['position_2'] = -np.sign(data3['position_1'])
data3['position_2'].plot(ylim=[-1.1, 1.1], figsize=(10, 6),title = 'Trading Signal_Downtrade');
3. 计算策略年化收益并可视化
data3['returns_1'] = np.log(data3['000568'] / data3['000568'].shift(1))
data3['returns_2'] = np.log(data3['000858'] / data3['000858'].shift(1))
data3['strategy'] = 0.5*(data3['position_1'].shift(1) * data3['returns_1']) + 0.5*(data3['position_2'].shift(1) * data3['returns_2'])
data3[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).tail(1)
|
returns_1 |
returns_2 |
strategy |
date |
|
|
|
2014/12/31 |
0.892955 |
0.97347 |
1.174494 |
data3[['returns_1','returns_2','strategy']].dropna().cumsum().apply(np.exp).plot(figsize=(10, 8),title = 'Strategy_Backtesting');
data3[['returns_1','returns_2','strategy']].dropna().mean() * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.105002
dtype: float64
data3[['returns_1','returns_2','strategy']].dropna().std() * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.068639
dtype: float64
data3['cumret'] = data3['strategy'].dropna().cumsum().apply(np.exp)
data3['cummax'] = data3['cumret'].cummax()
drawdown = (data3['cummax'] - data3['cumret'])
drawdown.max()
0.038159777097367176
策略的思考
- 对多只ETF进行配对交易,是很多实盘量化基金的交易策略;
策略的风险和问题:
-
Spread不回归的风险,当市场结构发生重大改变时,用过去历史回归出来的Spread会发生不回归的重大风险;
-
中国市场做空受到限制,策略中有部分做空的收益是无法获得的;
-
回归系数需要Rebalancing;
-
策略没有考虑交易成本和其他成本;