2019-11-17

1、数据探索和预测

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets

boston = datasets.load_boston()# 查看数据集描述

print(boston.DESCR)

boston.feature_names

x = boston.data[:,5]# 取出数据中的第六例的所有行（房间数量）

y = boston.target# 取出样本标签

plt.scatter(x,y) #画散点图

plt.show()

np.max(y)

x = x[y < 50.0]#去除数据做散点图

y = y[y < 50.0]

plt.scatter(x,y)

plt.show()

平均房价对应的档数

2.2 简单线性回归预测

from myAlgorithm.model_selection importtrain_test_split

x_train, x_test, y_train, y_test =train_test_split(x, y, seed=666)

print(x_train.shape)

print(y_train.shape)

print(x_test.shape)

print(y_test.shape)

reg = SimpleLinearRegression()

reg.fit(x_train,y_train)

print(reg.a_) # 7.8608543562689555

print(reg.b_) # -27.459342806705543

from myAlgorithm.SimpleLinearRegressionimport SimpleLinearRegression

plt.scatter(x_train,y_train)

plt.plot(x_train,reg.predict(x_train),color='r')

plt.show()

y_predict = reg.predict(x_test)#进行预测

print(y_predict)

3、封装以及调用

import numpy as np

from math import sqrt

def accuracy_score(y_true, y_predict):

"""计算y_true和y_predict之间的准确率"""

assert y_true.shape[0] != y_predict.shape[0], \

"the size of y_true must be equal to the size of y_predict"

return sum(y_true == y_predict) / len(y_true)

def mean_squared_error(y_true, y_predict):

"""计算y_true和y_predict之间的MSE"""

assert len(y_true) == len(y_predict), \

"the size of y_true must be equal to the size of y_predict"

return np.sum((y_true - y_predict) ** 2) / len(y_true)

def root_mean_squared_error(y_true, y_predict):

"""计算y_true和y_predict之间的RMSE"""

return sqrt(mean_squared_error(y_true, y_predict))

def mean_absolute_error(y_true, y_predict):

"""计算y_true和y_predict之间的MAE"""

assert len(y_true) == len(y_predict), \

"the size of y_true must be equal to the size of y_predict"

return np.sum(np.absolute(y_predict - y_true)) / len(y_predict)

3.2调用以及结果输出

from myAlgorithm.metrics import mean_squared_error

from myAlgorithm.metrics import root_mean_squared_error

from myAlgorithm.metrics import mean_absolute_error

mean_squared_error(y_test, y_predict)

# 输出：24.156602134387438

root_mean_squared_error(y_test, y_predict)

# 输出：4.914936635846635

mean_absolute_error(y_test, y_predict)

# 输出：3.5430974409463873

3.3模型评价R方

from sklearn.metrics import mean_squared_error

from sklearn.metrics import mean_absolute_error

mean_squared_error(y_test, y_predict)# 输出：24.156602134387438

mean_absolute_error(y_test, y_predict)# 输出：3.5430974409463873

总结：

线性回归的评价指标与分类的评价指标有很大的不同，本篇介绍了均方误差MSE（预测值与真实值之差的平方和，再除以样本量）、均方根误差RMSE（为了消除量纲，将MSE开方）、平均绝对误差MAE（预测值与真实值之差的绝对值，再除以样本量）、以及非常重要的、效果非常好的R方（因此用1减去较少的错误除以较多的错误，实际上是衡量了我们的模型拟合住数据的地方，即没有产生错误的相应指标）。

2019-11-17

你可能感兴趣的:(2019-11-17)