引入工具包
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy import stats
读入数据
data=pd.read_csv(r'C:\Users\lenovo\Documents\计量\作业5\womenwk.csv')
data.head()
age | education | married | children | work | cons | |
---|---|---|---|---|---|---|
0 | 22 | 10 | 1 | 0 | 0 | 1 |
1 | 36 | 10 | 1 | 0 | 1 | 1 |
2 | 28 | 10 | 1 | 0 | 0 | 1 |
3 | 37 | 10 | 1 | 0 | 0 | 1 |
4 | 39 | 10 | 1 | 1 | 1 | 1 |
在读入数据之前,已手动在csv文件中添加一列作为常数项
定义X,y
X=data.drop(['work'],axis=1)
y=data['work']
定义优化函数
theta=np.zeros([1,5])#给theta赋初值
def sigma(z):
return (1 / (1 + np.exp(-z)))#利用分布函数转为0,1之间
def model(X, theta):
return (sigma(np.dot(X, theta.T)))#返回0,1之间的概率
def MLE(theta):
return (-np.sum((y)*np.log((model(X, theta)))+(1 - y)*( np.log(1-(model(X, theta))))) )#似然函数
极大化似然函数,即最小化负的似然函数
beta=minimize(MLE,theta,method='BFGS')#选择拟牛顿法优化
得到的beta估计值为
beta_h=np.array([ 0.05793019, 0.09825122, 0.74177801, 0.76448814, -4.15924236])
与教材基本一致
minimize函数内置海塞矩阵的逆,可以直接输出
Hes=beta.hess_inv
I = np.diagonal(Hes)
std = np.sqrt(I)#标准误
输出标准误
array([0.01049844, 0.02006933, 0.32907809, 0.05084056, 0.30591416])
定义沃尔德统计量并计算p值
Wald=np.power(len(X)*(beta_h),2)/std
p=1-stats.chi2.cdf(Wald,1)
输出p
array([0., 0., 0., 0., 0.])
将结果统计为表格查看
coef | std | p | |
---|---|---|---|
education | 0.09825122 | 0.02006933 | 0 |
married | 0.74177801 | 0.32907809 | 0 |
children | 0.76448814 | 0.05084056 | 0 |
cons | -4.15924236 | 0.30591416 | 0 |
age | 0.05793019 | 0.01049844 | 0 |