简单线性回归

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

from statsmodels.formula.api import ols

cred = pd.read_csv("creditcard_exp.csv",skipinitialspace=True)

cred.head()

#数据清洗 用于建模
cred2 = cred[cred['avg_exp'].notnull()].iloc[:, :].copy()
cred3 = cred[cred['avg_exp'].isnull()].iloc[:, :].copy()

cred2.head()

#相关分析  散点图

cred2.plot('Income','avg_exp',kind='scatter')

#线性回归 回归建模时 y 不能有空数据
lm_s = ols('avg_exp ~ Income',data=cred2).fit()

print(lm_s.params)

lm_s.summary()

#预测 预测用原始数据
pre = lm_s.predict(cred) #每条记录根据回归系数预测出y  avg_exp = 258.04+97.72*Income
cred['pre'] = pre
cred.head()

你可能感兴趣的:(python)