Python与数据分析教材代码-第十周

# -*- coding: utf-8 -*-
from numpy import *
import pandas as pd
###线性回归####
#读取数据
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)

data.head()

data.tail()

#画散点图
import seaborn as sns
import matplotlib

%matplotlib inline

sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size=7, aspect=0.8)

sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size=7, aspect=0.8, kind='reg')

#计算相关系数矩阵
data.corr()

#构建X、Y数据集
X = data[['TV', 'Radio', 'Newspaper']]
X.head()

y = data['Sales']
y.head()

##直接根据系数矩阵公式计算
def standRegres(xArr,yArr):
    xMat = mat(xArr); yMat = mat(yArr).T
    xTx = xMat.T*xMat
    if linalg.det(xTx) == 0.0:
        print "This matrix is singular, cannot do inverse"
        return
    ws = xTx.I * (xMat.T*yMat)
    return ws


#求解回归方程系数
X2=X
X2['intercept']=[1]*200
standRegres(X2,y)


##利用现有库求解
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()

linreg.fit(X, y)

print linreg.intercept_
print linreg.coef_
print zip(['TV','Radio','Newspaper'], linreg.coef_)

##测试集和训练集的构建
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
linreg.fit(X_train, y_train)
#结果
print linreg.intercept_
print linreg.coef_
print zip(['TV','Radio','Newspaper'], linreg.coef_)

#预测
y_pred = linreg.predict(X_test)

#误差评估
from sklearn import metrics

# calculate MAE using scikit-learn
print "MAE:",metrics.mean_absolute_error(y_test,y_pred)


# calculate MSE using scikit-learn
print "MSE:",metrics.mean_squared_error(y_test,y_pred)


# calculate RMSE using scikit-learn
print "RMSE:",np.sqrt(metrics.mean_squared_error(y_test,y_pred))

##模型比较
feature_cols = ['TV', 'Radio']

X = data[feature_cols]
y = data.Sales

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

linreg.fit(X_train, y_train)

y_pred = linreg.predict(X_test)


# calculate MAE using scikit-learn
print "MAE:",metrics.mean_absolute_error(y_test,y_pred)


# calculate MSE using scikit-learn
print "MSE:",metrics.mean_squared_error(y_test,y_pred)


# calculate RMSE using scikit-learn
print "RMSE:",np.sqrt(metrics.mean_squared_error(y_test,y_pred))

 

你可能感兴趣的:(Python教程)