1、数据探索和预测
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
boston = datasets.load_boston()# 查看数据集描述
print(boston.DESCR)
boston.feature_names
x = boston.data[:,5]# 取出数据中的第六例的所有行(房间数量)
y = boston.target# 取出样本标签
plt.scatter(x,y) #画散点图
plt.show()
np.max(y)
x = x[y < 50.0]#去除数据做散点图
y = y[y < 50.0]
plt.scatter(x,y)
plt.show()
2.2 简单线性回归预测
from myAlgorithm.model_selection importtrain_test_split
x_train, x_test, y_train, y_test =train_test_split(x, y, seed=666)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
reg = SimpleLinearRegression()
reg.fit(x_train,y_train)
print(reg.a_) # 7.8608543562689555
print(reg.b_) # -27.459342806705543
from myAlgorithm.SimpleLinearRegressionimport SimpleLinearRegression
plt.scatter(x_train,y_train)
plt.plot(x_train,reg.predict(x_train),color='r')
plt.show()
y_predict = reg.predict(x_test)#进行预测
print(y_predict)
3、封装以及调用
import numpy as np
from math import sqrt
def accuracy_score(y_true, y_predict):
"""计算y_true和y_predict之间的准确率"""
assert y_true.shape[0] != y_predict.shape[0], \
"the size of y_true must be equal to the size of y_predict"
return sum(y_true == y_predict) / len(y_true)
def mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的MSE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"
return np.sum((y_true - y_predict) ** 2) / len(y_true)
def root_mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的RMSE"""
return sqrt(mean_squared_error(y_true, y_predict))
def mean_absolute_error(y_true, y_predict):
"""计算y_true和y_predict之间的MAE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"
return np.sum(np.absolute(y_predict - y_true)) / len(y_predict)
3.2调用以及结果输出
from myAlgorithm.metrics import mean_squared_error
from myAlgorithm.metrics import root_mean_squared_error
from myAlgorithm.metrics import mean_absolute_error
mean_squared_error(y_test, y_predict)
# 输出:24.156602134387438
root_mean_squared_error(y_test, y_predict)
# 输出:4.914936635846635
mean_absolute_error(y_test, y_predict)
# 输出:3.5430974409463873
3.3模型评价R方
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
mean_squared_error(y_test, y_predict)# 输出:24.156602134387438
mean_absolute_error(y_test, y_predict)# 输出:3.5430974409463873
总结:
线性回归的评价指标与分类的评价指标有很大的不同,本篇介绍了均方误差MSE(预测值与真实值之差的平方和,再除以样本量)、均方根误差RMSE(为了消除量纲,将MSE开方)、平均绝对误差MAE(预测值与真实值之差的绝对值,再除以样本量)、以及非常重要的、效果非常好的R方(因此用1减去较少的错误除以较多的错误,实际上是衡量了我们的模型拟合住数据的地方,即没有产生错误的相应指标)。