IC:
因子暴露度:因子的值,一般是经过了数据处理之后的值
收益率
API:
import scipy.stats as st
st.spearman(因子,收益率)
示例: 手动计算因子的IC值
import scipy.stats as st
import pandas as pd
import numpy as np
# 导入去极值,标准化函数
# 去极值
def med_method(factor):
# 1、找到MAD值
med = np.median(factor)
distance = abs(factor - med)
MAD = np.median(distance)
# 2、求出MAD_e
MAD_e = 1.4826 * MAD
# 3、求出正常值范围的边界
up_scale = med + 3 * MAD_e
down_scale = med - 3 * MAD_e
# 4、替换
factor = np.where(factor > up_scale, up_scale, factor)
factor = np.where(factor < down_scale, down_scale, factor)
return factor
# 自实现标准化
# (x - mean) / std
def stand_method(factor):
mean = np.mean(factor)
std = np.std(factor)
factor = (factor - mean) / std
return factor
# 1.获取因子的暴露度
# 1)获取财务数据
stocks = industry('C39')
q = query(fundamentals.eod_derivative_indicator.pe_ratio).filter(fundamentals.stockcode.in_(stocks))
fund = get_fundamentals(q, entry_date="20170105")[:,0,:]
# print("fund的值为:\n", fund)
# 2)处理缺失值
fund = fund.dropna()
# 3)去极值,标准化
fund['pe_ratio'] = med_method(fund['pe_ratio'])
fund['pe_ratio'] = stand_method(fund['pe_ratio'])
# print(fund['pe_ratio'])
# 2.获取该因子的收益率
# 1)获取历史的交易数据
price = get_price(list(fund.index),start_date="20170105", end_date='20170106', fields='close').T
# 2) 处理缺失值
price = price.dropna()
# 3)进行收益率计算
return_rate = price.iloc[:,1]/price.iloc[:,0] - 1
# 4)合并处理
data = pd.concat([fund['pe_ratio'], return_rate], axis=1)
# 3.进行相关系数计算
pe_ratio_ic = st.spearmanr(data['pe_ratio'], data[0])
使用Alphalens 因子分析工具
alphalens.utils.get_clean_factor_and_forward_returns(factor, prices,...)
factor:因子数据
price:价格数据
准备Alphalens通用数据格式的流程
代码:
使用Alphalens对某个因子在某一年的每天的IC进行分析
import pandas as pd
import numpy as np
# 去极值
def med_method(factor):
# 1、找到MAD值
med = np.median(factor)
distance = abs(factor - med)
MAD = np.median(distance)
# 2、求出MAD_e
MAD_e = 1.4826 * MAD
# 3、求出正常值范围的边界
up_scale = med + 3 * MAD_e
down_scale = med - 3 * MAD_e
# 4、替换
factor = np.where(factor > up_scale, up_scale, factor)
factor = np.where(factor < down_scale, down_scale, factor)
return factor
# 自实现标准化
# (x - mean) / std
def stand_method(factor):
mean = np.mean(factor)
std = np.std(factor)
factor = (factor - mean) / std
return factor
# 获取分析周期,一年
date = get_trading_dates(start_date="20170101", end_date="20180101")
# 1)准备factor因子数据 格式:series-multiindex
# 分析哪一个因子?earnings_per_share
# 确定分析的时间区间?2017-01-01~2018-01-01
# 给获取到的因子数据附上当天交易日的日期信息
# 添加复合索引 set_index([“date”, 股票列表])
# 因子数据处理:去极值、标准化
all_data = pd.DataFrame()
for i in range(len(date)):
q = query(fundamentals.financial_indicator.earnings_per_share)
fund = get_fundamentals(q, entry_date=date[i])[:, 0, :]
# 给因子值横截面数据加上日期信息
fund["date"] = date[i]
# 要把每一个交易日的横截面数据竖着粘在一起
all_data = pd.concat([all_data, fund])
all_data = all_data.set_index(["date", all_data.index])
# 因子数据处理:去极值、标准化
all_data["earnings_per_share"] = med_method(all_data["earnings_per_share"])
all_data["earnings_per_share"] = stand_method(all_data["earnings_per_share"])
# factor 准备完毕
factor = all_data["earnings_per_share"]
# 2)准备prices价格数据 格式:pd.DataFrame
# 确定股票范围?全部A股
stocks = all_instruments()
stocks = stocks["order_book_id"]
prices = get_price(list(stocks), start_date="20170101", end_date="20180101", fields="close")
# 导入alphalens
from alphalens import tears, performance, plotting, utils
factor_data = utils.get_clean_factor_and_forward_returns(factor, prices)
IC = performance.factor_information_coefficient(factor_data)
# 画因子时间序列和移动平均图
plotting.plot_ic_ts(IC)
# 画因子的直方图
plotting.plot_ic_hist(IC)
# 计算月平均因子IC值
mean_monthly_ic = performance.mean_information_coefficient(factor_data, by_time="1m")
# 画出月平均IC的热力图
plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
# 直接获取综合信息——IC值
tears.create_information_tear_sheet(factor_data)
# 计算IC>0.02的比例
a = IC.iloc[:, 0][IC.iloc[:, 0] > 0.02]
rate = len(a) / len(IC)
IC综合分析
因子收益率分析
因子分析好了,接下来干嘛?
单个因子的回测表现
示例:分组测试单个因子的年化收益情况
import numpy as np
# 流程分析:
# 目标:按分位数将该因子(earnings_per_share)的股票分成5组,分别进行回测,看年化收益
# 确定区间:2017-01-01~2018-01-01
# 确定选股范围:全部A股
# 确定调仓周期:按月
# 对因子进行处理:缺失值处理、去极值、标准化
# 分组选股:5组分别进行回测
def init(context):
# 分组
context.group = 2
# 每月的定时器
scheduler.run_monthly(select_stocks, tradingday=1)
def select_stocks(context, bar_dict):
# 选股
q = query(fundamentals.financial_indicator.earnings_per_share)
fund = get_fundamentals(q).T
# 因子处理
# 处理缺失值
fund = fund.dropna()
# 去极值
fund["earnings_per_share"] = med_method(fund["earnings_per_share"])
# 标准化
fund["earnings_per_share"] = stand_method(fund["earnings_per_share"])
# print(fund)
factor = fund["earnings_per_share"]
# 按照分位数进行分组
if context.group == 1:
data = factor[factor < factor.quantile(0.2)]
elif context.group == 2:
data = factor[(factor > factor.quantile(0.2)) & (factor < factor.quantile(0.4))]
elif context.group == 3:
data = factor[(factor > factor.quantile(0.4)) & (factor < factor.quantile(0.6))]
elif context.group == 4:
data = factor[(factor > factor.quantile(0.6)) & (factor < factor.quantile(0.8))]
else:
data = factor[factor > factor.quantile(0.8)]
# 选出来的股票
context.stocks = data.index
print("选出来的股票为:\n", context.stocks)
# 调仓
# 卖出
for stock in context.portfolio.positions.keys():
if context.portfolio.positions[stock].quantity > 0:
if stock not in context.stocks:
order_target_percent(stock, 0)
# 买入
for stock in context.stocks:
order_target_percent(stock, 1.0/20)
# before_trading此函数会在每天策略交易开始前被调用,当天只会被调用一次
def before_trading(context):
pass
# 你选择的证券的数据更新将会触发此段逻辑,例如日或分钟历史数据切片或者是实时数据切片更新
def handle_bar(context, bar_dict):
pass
# after_trading函数会在每天交易结束后被调用,当天只会被调用一次
def after_trading(context):
pass
# 去极值
def med_method(factor):
# 1、找到MAD值
med = np.median(factor)
distance = abs(factor - med)
MAD = np.median(distance)
# 2、求出MAD_e
MAD_e = 1.4826 * MAD
# 3、求出正常值范围的边界
up_scale = med + 3 * MAD_e
down_scale = med - 3 * MAD_e
# 4、替换
factor = np.where(factor > up_scale, up_scale, factor)
factor = np.where(factor < down_scale, down_scale, factor)
return factor
# 自实现标准化
# (x - mean) / std
def stand_method(factor):
mean = np.mean(factor)
std = np.std(factor)
factor = (factor - mean) / std
return factor
方法
示例:多因子合成
# 合成以下因子:
# earnings_per_share;fully_diluted_earnings_per_share;return_on_equity
# 1、获取这三个因子的横截面数据
# 2、因子处理、缺失值、去极值、标准化
# 3、PCA流程
import pandas as pd
# 1、获取这三个因子的横截面数据
date = get_trading_dates(start_date="20170101", end_date="20180101")
all_data = pd.DataFrame()
for i in range(len(date)):
q = query(fundamentals.financial_indicator.fully_diluted_earnings_per_share,
fundamentals.financial_indicator.earnings_per_share,
fundamentals.financial_indicator.return_on_equity)
fund = get_fundamentals(q, entry_date=date[i])[:, 0, :]
# 要把每一个交易日的横截面数据竖着粘在一起
all_data = pd.concat([all_data, fund])
# 处理缺失值
all_data = all_data.dropna()
import numpy as np
def med_method(factor):
# 1、找到MAD值
med = np.median(factor)
distance = abs(factor - med)
MAD = np.median(distance)
# 2、求出MAD_e
MAD_e = 1.4826 * MAD
# 3、求出正常值范围的边界
up_scale = med + 3 * MAD_e
down_scale = med - 3 * MAD_e
# 4、替换
factor = np.where(factor > up_scale, up_scale, factor)
factor = np.where(factor < down_scale, down_scale, factor)
return factor
# 自实现标准化
# (x - mean) / std
def stand_method(factor):
mean = np.mean(factor)
std = np.std(factor)
factor = (factor - mean) / std
return factor
# 去极值和标准化
all_data["fully_diluted_earnings_per_share"] = med_method(all_data["fully_diluted_earnings_per_share"])
all_data["earnings_per_share"] = med_method(all_data["earnings_per_share"])
all_data["return_on_equity"] = med_method(all_data["return_on_equity"])
all_data["fully_diluted_earnings_per_share"] = stand_method(all_data["fully_diluted_earnings_per_share"])
all_data["earnings_per_share"] = stand_method(all_data["earnings_per_share"])
all_data["return_on_equity"] = stand_method(all_data["return_on_equity"])
from sklearn.decomposition import PCA
# PCA降维
# 实例化一个转换器类
transfer = PCA(n_components=1)
# fit_transform
data = transfer.fit_transform(all_data)