Boston房价数据集线性回归—— sklearn & statsmodels 比较

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model, metrics
import statsmodels.api as sm

boston = datasets.load_boston() # 这个数据集是一个字典

def skl_func():
    X = boston.data # data是字典的一个key
    y = boston.target
    
    #print (boston.feature_names)
    #print (boston.DESCR) # 数据集描述
    
    from sklearn.cross_validation import train_test_split # 分训练集和测试集要使用的模块
    
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
    
    lm = linear_model.LinearRegression()
    lm.fit(x_train, y_train)
    print ("Scikit-learn's Results")
    print (lm.coef_,'\n')

def sm_func():
    X = pd.DataFrame(boston.data, columns = boston.feature_names)
    y = pd.DataFrame(boston.target, columns = ['MEDV'])
    X = sm.add_constant(X) # statsmodels 中的线性回归模型没有截距项,这一行是给训练集加上一列数值为1的特征
    model = sm.OLS(y,X).fit()
    print ("Statsmodels's Results")
    print (model.summary())

skl_func()
sm_func()

你可能感兴趣的:(数据挖掘,人工智能,机器学习)