R i t − R f t = α i + β i ( R m t − R f t ) + s i S M B I t + h i H M I t + ϵ i t R_{it}-R_{ft}=\alpha_{i}+\beta_{i}(R_{mt}-R_{ft})+s_{i}SMBI_{t}+hiHMI_{t}+\epsilon_{it} Rit−Rft=αi+βi(Rmt−Rft)+siSMBIt+hiHMIt+ϵit
,我们选取市经率的倒数 1 / p b 1/pb 1/pb作为 H M I HMI HMI、总市值total_mv为 S M B SMB SMB和资产历史收益率 R i t R_{it} Rit作为三因子进行回归。
随便选一只股票查看他们的相关关系
# coding=utf-8
import math
import tushare as ts
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import talib
import pandas as pd
from datetime import datetime, date
matplotlib.rcParams['axes.unicode_minus']=False
plt.rcParams['font.sans-serif']=['SimHei']
ts.set_token('。。。。。')
pro = ts.pro_api()
df=pro.query('daily_basic', ts_code='600300.SH',fields='close,ts_code,ps,total_mv')
df.corr()
代码运行需里获取token码
完整代码
# coding=utf-8
import math
import tushare as ts
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import talib
import pandas as pd
from datetime import datetime, date
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
matplotlib.rcParams['axes.unicode_minus']=False
plt.rcParams['font.sans-serif']=['SimHei']
ts.set_token('f3e00efb72token 码11477')
pro = ts.pro_api()
############################读取数据类###################################
class readData:
def read_index_daily(self,code,star,end):#指数数据
dsb = pro.index_daily(ts_code=code, start_date=star, end_date=end,fields='ts_code,trade_date,close,change')#默认读取三个数据
return dsb
def read_daily(self,code,star,end):
dsc1 = pro.daily(ts_code=code, start_date=star, end_date=end,fields='ts_code,trade_date,close')
return dsc1
def read_CPI(self,star,end):#时间格式start_cpi='201609'
dc=pro.cn_cpi(start_m=star, end_m=end,fields='month,nt_yoy')
return dc
def read_GDP(self,star,end):#时间格式star='2016Q4'
df1 = pro.cn_gdp(start_q=star, end_q=end,fields='quarter,gdp_yoy')
return df1
def read_bond(self,code,star,end):
df=pro.cb_daily(ts_code=code,start_date=star,end_date=end)
def read_base(self,code):
df=pro.query('daily_basic', ts_code=code,fields='close,ts_code,pb,total_mv,trade_date')
return df
#####################################################################
start_time='20200110'#发布GDP需要时间,我们延迟1个月,即第一季度的GDP4月份才发布。
end_time="20200331"
dc=readData()
dsc1=readData()
dsb1=readData()
def alpha_fun(code):
dsb=dsb1.read_base(code) .fillna(0)
dsc=dsc1.read_index_daily('000300.SH',start_time,end_time)
dsc.set_index(['trade_date'],inplace=True)
dsb.set_index(['trade_date'],inplace=True)
df=pd.merge(dsc, dsb, on='trade_date').fillna(0)
R=np.reshape( np.array([df.close_y]) , (-1,1) )
R_f=np.reshape( np.array([ (df.change/(df.close_x.shift(-1))).fillna(0) ]) , (-1,1) )#用0 填充nan
HMI=np.reshape( np.array([ (1/df.pb).fillna(0) ]) , (-1,1) )
SMB=np.reshape( np.array([ df.total_mv]) , (-1,1) )
X=np.concatenate(( R_f-4/252, HMI,SMB ),axis=1)
y1=np.reshape(R,(1,-1)).T
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.3, random_state=0)
linear = LinearRegression()
linear.fit(X_train, y_train)
alpha=linear.intercept_-4/252
return alpha,linear.intercept_ ,linear.coef_,linear.score(X_test, y_test),df
def Sy_function(df1,star,end):
df=pro.query('daily', ts_code=df1, start_date=star, end_date=end,fields='')
df=df.sort_index()
df.index=pd.to_datetime(df.trade_date,format='%Y-%m-%d')#设置日期索引
ret=df.change/df.close.shift(-1)
dd=pd.Series(1,index=df.close.index)
cumqq=ret*dd.shift(1).fillna(0)
cum=(np.cumprod(1+ret[cumqq.index[0:]])-1)#等权重配置一篮子股票
return cum.fillna(0),ret.fillna(0)
co=pro.query('daily_basic', ts_code="",trade_date="20200203",fields='ts_code')
code_list=[]
N=300#股票池
k=0
ret=0
cum=0
for i in co.ts_code.values[0:N]:
try:
if alpha_fun(i)[0]<-10:
k+=1
ret=Sy_function( str(i) ,start_time,end_time)[1]+ret
except ValueError:
pass
continue
ret=ret.sort_index(axis=0,ascending=True)
cum=np.cumprod(1+ret)-1#
RET=ret/k
####################计算收益率函数,如沪深300#####################################
def JZ_function(code,star,end):
df12 = pro.index_daily( ts_code=code, start_date=star, end_date=end)
df12=df12.sort_index( )
df12.index=pd.to_datetime(df12.trade_date,format='%Y-%m-%d')#设置日期索引
ret12=df12.change/df12.close.shift(-1)
#将顺序颠倒
aq=pd.Series(1,index=df12.close.index)
SmaRet=ret12*aq.shift(1).dropna()
cum12=np.cumprod(1+ret12[SmaRet.index[0:]])-1
return cum12
#############################策略的年化统计######################################
def Tongji(RET,cum):
RET1 = RET*100 - (4/252)
NH=cum[-2]*100*252/len(RET.index)
BD=np.std(RET)*100*np.sqrt(252)
SR=(NH-400/252)/BD
for i in range(len(cum)):
if cum[cum.index[i]]==cum.max():
MHC=(cum.max()-cum[cum.index[i:]].min())*100/cum.max()
print("年化收益率:{:.2f}%:,年化夏普率:{:.2f},波动率为:{:.2f}%,最大回撤:{:.2f}%".format( NH,SR,BD,MHC))
############################################################################
if __name__=="__main__":
cum12=JZ_function('000300.SH',start_time,end_time)
Tongji(RET,cum)
plt.plot(cum12,label="沪深300",color='b')
plt.plot(cum,label="股票组合",color='r')
plt.title("alpha股+指期对冲策略")
plt.legend()
#a=alpha_fun('600300.SH')
#print("alpha:{}".format( a[0] ))
#print('f截距:{}'.format(a[1]))
#print(f'系数:{a[2]}')
#print(f'准确率:{a[3]:.4f}')
FF三因子(改进代码+结果)
参考资料:
https://baike.so.com/doc/4284549-4487889.html