python实习自变量选择(复决定系数、AIC、BIC、Cp统计量)

 

from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn import model_selection
import statsmodels.api as sm
import math

k = {'自变量子集':['x1','x2','x3','x1,x2','x1,x3','x2,x3','x1,x2,x3']}
list = ['y~x1','y~x2','y~x3','y~x1+x2','y~x1+x3','y~x2+x3','y~x1+x2+x3']
model_m = sm.formula.ols('y~x1+x2+x3',data =consume).fit()
x1 = df['x1']
x2 = df['x2'] 
x3 = df['x3'] 
y = df['y'] 
p = [1,1,1,2,2,2,3]
r = []
w = []
a = []
b = []
c = []
n=18
m=3
for i,j in zip(list,p):
    model = sm.formula.ols(i,data =consume).fit()
    R2 = 1- sum(pow(model.resid,2))/sum((df['y']-(df['y'].mean()))**2)
    Ra2 =1-(n-1)*(1-R2)/(n-j-1) #复决定系数
    AIC = math.log(sum(pow(model.resid,2)))*n+2*j #AIC 
    BIC = math.log(sum(pow(model.resid,2)))*n+math.log(n)*j  #BIC
    Cp = (n-m-1)*sum(pow(model.resid,2))/sum(pow(model_m.resid,2))-n+2*j  #Cp统计量
    r.append(R2)
    w.append(Ra2)
    a.append(AIC)
    b.append(BIC)
    c.append(Cp)
df2 = pd.DataFrame(k)
df2['R2'] = r
df2['Ra2'] = w
df2['AIC'] = a
df2['BIC'] = b
df2['Cp'] = c
df2

你可能感兴趣的:(python)