from statsmodels.formula.api import ols
cred = pd.read_csv("creditcard_exp.csv",skipinitialspace=True)
cred.head()
#数据清洗 用于建模
cred2 = cred[cred['avg_exp'].notnull()].iloc[:, :].copy()
cred3 = cred[cred['avg_exp'].isnull()].iloc[:, :].copy()
cred2.head()
#相关分析 散点图
cred2.plot('Income','avg_exp',kind='scatter')
#线性回归 回归建模时 y 不能有空数据
lm_s = ols('avg_exp ~ Income',data=cred2).fit()
print(lm_s.params)
lm_s.summary()
#预测 预测用原始数据
pre = lm_s.predict(cred) #每条记录根据回归系数预测出y avg_exp = 258.04+97.72*Income
cred['pre'] = pre
cred.head()