20. 日月光华 Python数据分析 - 机器学习 - 一元线性回归

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

x = np.linspace(0,30,20)
y = x + 3*np.random.randn(20)
x
# array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
#         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
#        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
#        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])

y
# array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
#         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
#        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
#        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])

plt.figure(figsize=(10,8))
plt.scatter(x,y)
image.png
from sklearn.linear_model import LinearRegression

model = LinearRegression()
x
# array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
#         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
#        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
#        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])

y
# array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
#         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
#        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
#        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])

X = x.reshape(-1,1)      # 变成任意行/1列的数据
Y = y.reshape(-1,1)
model.fit(X, Y)
model.predict([[40]])
# array([[44.23825314]])

x
# array([ 0.        ,  1.57894737,  3.15789474,  4.73684211,  6.31578947,
#         7.89473684,  9.47368421, 11.05263158, 12.63157895, 14.21052632,
#        15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
#        23.68421053, 25.26315789, 26.84210526, 28.42105263, 30.        ])

y
# array([-2.63866254,  0.59460722, -2.71828905,  2.49031593,  8.0935728 ,
#         3.74772581,  8.38599842,  8.57329823, 12.38546919, 18.17834314,
#        16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
#        24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])

plt.figure(figsize = (6,4))
plt.scatter(X,Y)
x1 = np.linspace(0,45).reshape(-1,1)    # reshape 变换成任意行/1列的数据
plt.plot(x1,model.predict(x1))
image.png

1.准备数据 2.初始化模型 3.预测 4.评价

len(X)
Y
# array([[-2.63866254],
#       [ 0.59460722],
#       [-2.71828905],
#       [ 2.49031593],
#       [ 8.0935728 ],
#       [ 3.74772581],
#       [ 8.38599842],
#       [ 8.57329823],
#       [12.38546919],
#       [18.17834314],
#       [16.66062374],
#       [17.78108955],
#       [23.23214217],
#       [22.98644755],
#       [22.57692316],
#       [24.38489051],
#       [28.83051157],
#       [25.62039455],
#       [32.72456454],
#       [28.93943828]])

Y_PRE = model.predict(X)
np.sum(np.square(Y_PRE - Y))    # 计算损失值,测试集到训练集的差
model.intercept_    # 截距
# array([-2.4765995])

model.coef_      # 斜率
# array([[1.16787132]])

Y_PRE2 = (model.coef_ + 0.1)*X + model.intercept_
np.sum(np.square(Y_PRE2 - Y))      # 计算损失值,测试集到训练集的平方差
# 165.7419197589115

客观的评价模型

X_train, X_test = X[:15], X[15:]
Y_train, Y_test = Y[:15], Y[15:]
X_train
# array([[ 0.        ],
#       [ 1.57894737],
#       [ 3.15789474],
#       [ 4.73684211],
#       [ 6.31578947],
#       [ 7.89473684],
#       [ 9.47368421],
#       [11.05263158],
#       [12.63157895],
#       [14.21052632],
#       [15.78947368],
#       [17.36842105],
#       [18.94736842],
#       [20.52631579],
#       [22.10526316]])

model = LinearRegression()   # 初始化
model.fit(X_train, Y_train)
np.sum(np.square(model.predict(X_test) - Y_test))  
# 64.39851406470771

model.coef_
# array([[1.2674295]])

model.intercept_
# array([-3.31979091])

Y_PRE3 = model.coef_*X_test + model.intercept_ + 0.5
np.sum(np.square(Y_PRE3 - Y_test))
# 78.6521397131842

plt.scatter(X,Y)
plt.plot(X,model.predict(X))
plt.plot(X , model.coef_*X + model.intercept_ + 0.5, color = 'r')
image.png

你可能感兴趣的:(20. 日月光华 Python数据分析 - 机器学习 - 一元线性回归)