import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# 十组样本数据
data = np.array([[ -2.95507616, 10.94533252],
[ -0.44226119, 2.96705822],
[ -2.13294087, 6.57336839],
[ 1.84990823, 5.44244467],
[ 0.35139795, 2.83533936],
[ -1.77443098, 5.6800407 ],
[ -1.8657203 , 6.34470814],
[ 1.61526823, 4.77833358],
[ -2.38043687, 8.51887713],
[ -1.40513866, 4.18262786]])
m = data.shape[0] # 样本大小
X = data[:, 0].reshape(-1, 1) # 将array转换成矩阵
y = data[:, 1].reshape(-1, 1)
plt.plot(X, y, "b.")
plt.xlabel('X')
plt.ylabel('y')
plt.show()
# 线性回归
lin_reg = LinearRegression()
lin_reg.fit(X, y)
print(lin_reg.intercept_, lin_reg.coef_) # 线性回归参数 [ 4.97857827] [[-0.92810463]]
X_plot = np.linspace(-3, 3, 1000).reshape(-1, 1)
# linspace(-3, 3, 1000)生成1000个-3到3之间固定间隔的数,reshape(-1, 1)将矩阵变为列向量
y_plot = np.dot(X_plot, lin_reg.coef_.T) + lin_reg.intercept_ # 计算线性方程y值。dot()是矩阵点积,.T是转置矩阵;
# lin_reg.coef_,lin_reg.intercept_分别是参数w,b
plt.plot(X_plot, y_plot, 'g-')
plt.plot(X, y, 'b.')
plt.xlabel('X')
plt.ylabel('y')
plt.savefig('regu-2.png', dpi=200)
h = np.dot(X.reshape(-1, 1), lin_reg.coef_.T) + lin_reg.intercept_
#print(mean_squared_error(h, y)) # 3.34 计算误差
# 多项式回归
poly_features = PolynomialFeatures(degree=2, include_bias=False) # 二维特征构造
X_poly = poly_features.fit_transform(X) #fit_transform()是数据预处理中的一个方法,
# fit即先对数据进行拟合,求得训练数据的固有属性,例如方差均值最值等属性,transform对数据进行归一化标准化等操作,将数据缩放映射至某个固定区间
#print(X_poly)
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
print(lin_reg.intercept_, lin_reg.coef_) # 模型参数 [ 2.60996757] [[-0.12759678 0.9144504 ]]
X_plot = np.linspace(-3, 3, 1000).reshape(-1, 1)
X_plot_poly = poly_features.fit_transform(X_plot) #特征构造
y_plot = np.dot(X_plot_poly, lin_reg.coef_.T) + lin_reg.intercept_ #计算模型结果
plt.plot(X_plot, y_plot, 'r-')
plt.plot(X, y, 'b.')
plt.show()
执行结果: