import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
x = np.linspace(0,30,20)
y = x + 3*np.random.randn(20)
x
# array([ 0. , 1.57894737, 3.15789474, 4.73684211, 6.31578947,
# 7.89473684, 9.47368421, 11.05263158, 12.63157895, 14.21052632,
# 15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
# 23.68421053, 25.26315789, 26.84210526, 28.42105263, 30. ])
y
# array([-2.63866254, 0.59460722, -2.71828905, 2.49031593, 8.0935728 ,
# 3.74772581, 8.38599842, 8.57329823, 12.38546919, 18.17834314,
# 16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
# 24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
plt.figure(figsize=(10,8))
plt.scatter(x,y)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
x
# array([ 0. , 1.57894737, 3.15789474, 4.73684211, 6.31578947,
# 7.89473684, 9.47368421, 11.05263158, 12.63157895, 14.21052632,
# 15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
# 23.68421053, 25.26315789, 26.84210526, 28.42105263, 30. ])
y
# array([-2.63866254, 0.59460722, -2.71828905, 2.49031593, 8.0935728 ,
# 3.74772581, 8.38599842, 8.57329823, 12.38546919, 18.17834314,
# 16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
# 24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
X = x.reshape(-1,1) # 变成任意行/1列的数据
Y = y.reshape(-1,1)
model.fit(X, Y)
model.predict([[40]])
# array([[44.23825314]])
x
# array([ 0. , 1.57894737, 3.15789474, 4.73684211, 6.31578947,
# 7.89473684, 9.47368421, 11.05263158, 12.63157895, 14.21052632,
# 15.78947368, 17.36842105, 18.94736842, 20.52631579, 22.10526316,
# 23.68421053, 25.26315789, 26.84210526, 28.42105263, 30. ])
y
# array([-2.63866254, 0.59460722, -2.71828905, 2.49031593, 8.0935728 ,
# 3.74772581, 8.38599842, 8.57329823, 12.38546919, 18.17834314,
# 16.66062374, 17.78108955, 23.23214217, 22.98644755, 22.57692316,
# 24.38489051, 28.83051157, 25.62039455, 32.72456454, 28.93943828])
plt.figure(figsize = (6,4))
plt.scatter(X,Y)
x1 = np.linspace(0,45).reshape(-1,1) # reshape 变换成任意行/1列的数据
plt.plot(x1,model.predict(x1))
1.准备数据 2.初始化模型 3.预测 4.评价
len(X)
Y
# array([[-2.63866254],
# [ 0.59460722],
# [-2.71828905],
# [ 2.49031593],
# [ 8.0935728 ],
# [ 3.74772581],
# [ 8.38599842],
# [ 8.57329823],
# [12.38546919],
# [18.17834314],
# [16.66062374],
# [17.78108955],
# [23.23214217],
# [22.98644755],
# [22.57692316],
# [24.38489051],
# [28.83051157],
# [25.62039455],
# [32.72456454],
# [28.93943828]])
Y_PRE = model.predict(X)
np.sum(np.square(Y_PRE - Y)) # 计算损失值,测试集到训练集的差
model.intercept_ # 截距
# array([-2.4765995])
model.coef_ # 斜率
# array([[1.16787132]])
Y_PRE2 = (model.coef_ + 0.1)*X + model.intercept_
np.sum(np.square(Y_PRE2 - Y)) # 计算损失值,测试集到训练集的平方差
# 165.7419197589115
客观的评价模型
X_train, X_test = X[:15], X[15:]
Y_train, Y_test = Y[:15], Y[15:]
X_train
# array([[ 0. ],
# [ 1.57894737],
# [ 3.15789474],
# [ 4.73684211],
# [ 6.31578947],
# [ 7.89473684],
# [ 9.47368421],
# [11.05263158],
# [12.63157895],
# [14.21052632],
# [15.78947368],
# [17.36842105],
# [18.94736842],
# [20.52631579],
# [22.10526316]])
model = LinearRegression() # 初始化
model.fit(X_train, Y_train)
np.sum(np.square(model.predict(X_test) - Y_test))
# 64.39851406470771
model.coef_
# array([[1.2674295]])
model.intercept_
# array([-3.31979091])
Y_PRE3 = model.coef_*X_test + model.intercept_ + 0.5
np.sum(np.square(Y_PRE3 - Y_test))
# 78.6521397131842
plt.scatter(X,Y)
plt.plot(X,model.predict(X))
plt.plot(X , model.coef_*X + model.intercept_ + 0.5, color = 'r')