方差分析一般步骤:
方差分析原假设为:
不同因子水平下反应变量的均值是相同的。
TSS=ESS+FSS
managers = pd.read_csv('managers.csv', index_col='Date')
MANA = managers[['HAM1', 'HAM3', 'HAM4']]
print(MANA)
ess1 = ((MANA.HAM1 - MANA.HAM1.mean()) ** 2).sum()
ess3 = ((MANA.HAM3 - MANA.HAM3.mean()) ** 2).sum()
ess4 = ((MANA.HAM4 - MANA.HAM4.mean()) ** 2).sum()
ess = ess1 + ess3 + ess4
t_mean = MANA.mean().mean()
fss = 132 * (((MANA.HAM1.mean() - t_mean) ** 2) + ((MANA.HAM3.mean() - t_mean) ** 2) +
((MANA.HAM4.mean() - t_mean) ** 2))
tss = fss + ess
φ=MSF / MSE=(FSS / (M-1)) / (ESS / (N-M))~F(M-1,N-M)
import pandas as pd
import statsmodels.stats.anova as anova
from statsmodels.formula.api import ols
returns = pd.DataFrame(pd.concat([MANA.HAM1, MANA.HAM3, MANA.HAM4]))
returns['Class'] = ['HAM1' for i in range(132)] + ['HAM3' for t in range(132)] + ['HAM4' for w in range(132)]
returns.columns = ['Return', 'Class']
print(returns)
model = ols('Return~C(Class)', data=returns).fit()
table1 = anova.anova_lm(model)
print(table1)
model = ols('Return~C(Class)+C(Year)', data=returns).fit()
table2 = anova.anova_lm(model)
print(table2)
model = ols('Return~C(Class)*C(Year)', data=returns).fit()
table3 = anova.anova_lm(model)
print(table3)