现有两组数据,求y=a*x+c的系数
X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43, 2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8] Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73, 11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
if __name__ == '__main__':
X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43,
2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8]
Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73,
11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]
#转换成numpy的ndarray数据格式,n行1列,LinearRegression需要列格式数据,如下:
X_train = np.array(X).reshape((len(X), 1))
Y_train = np.array(Y).reshape((len(Y), 1))
# 转换后数据格式如下
# X_train = [[12.46], [0.25], [5.22], [11.3], [6.81], [4.59], [0.66], [14.53], [15.49], [14.43], [2.19], [1.35],
# [10.02], [12.93], [5.93], [2.92], [12.81], [4.88], [13.11], [5.8]]
# Y_train = [[29.01], [4.7], [22.33], [24.99], [18.85], [14.89], [10.58], [36.84], [42.36], [39.73], [11.92], [7.45],
# [22.9], [36.62], [16.04], [16.56], [31.55], [20.04], [35.26], [23.59]]
#新建一个线性回归模型,并把数据放进去对模型进行训练
lineModel = LinearRegression()
lineModel.fit(X_train, Y_train)
#用训练后的模型,进行预测
Y_predict = lineModel.predict(X_train)
#coef_是系数,intercept_是截距
a1 = lineModel.coef_[0][0]
b = lineModel.intercept_[0]
print("y=%.4f*x+%.4f" % (a1,b))
#对回归模型进行评分,这里简单使用训练集进行评分,实际很多时候用其他的测试集进行评分
print("得分", lineModel.score(X_train, Y_train))
#简单画图显示
plt.scatter(X, Y, c="blue")
plt.plot(X_train,Y_predict, c="red")
plt.show()
结果:
y=2.0532*x+7.1234
得分 0.9149096589144883
这里随机创建X1,X2,X3,Y四个数组,使Y=2*X1-3*X2+X3+8。然后加入一些干扰噪声,再尝试做线性回归。
from sklearn.linear_model import LinearRegression
import numpy as np
import random
if __name__ == '__main__':
# 随机创建X1,X2,X3,Y。使Y=2*X1-3*X2+X3+8
X1 = [random.randint(0,100) for i in range(0, 50)]
X2 = [random.randint(0,50) for i in range(0, 50)]
X3 = [random.randint(0, 25) for i in range(0, 50)]
Y =[2*x1-3*x2+x3+8 for x1,x2,x3 in zip(X1,X2,X3)]
# 组合X1,X2成n行2列数据
X_train = np.array(X1+X2+X3).reshape((len(X1), 3), order="F")
Y_train = np.array(Y).reshape((len(Y), 1))
# 加入噪声干扰
noise = np.random.randn(50, 1)
noise = noise - np.mean(noise)
Y_train = Y_train+noise
#新建一个线性回归模型,并把数据放进去对模型进行训练
lineModel = LinearRegression()
lineModel.fit(X_train, Y_train)
#用训练后的模型,进行预测
Y_predict = lineModel.predict(X_train)
#coef_是系数,intercept_是截距
a_arr = lineModel.coef_[0]
b = lineModel.intercept_[0]
f=""
for i in range(0,len(a_arr)):
ai=a_arr[i]
if ai>=0:
ai = "+%.4f" %(ai)
else:
ai = "%.4f" % (ai)
f = f+"%s*x%s"%(ai, str(i+1))
f="y=%s+%.4f" % (f[1:],b)
print("拟合方程",f)
#对回归模型进行评分,这里简单使用训练集进行评分,实际很多时候用其他的测试集进行评分
print("得分", lineModel.score(X_train, Y_train))
结果:
拟合方程 y=1.9972*x1-3.0115*x2+1.0597*x3+7.7271
得分 0.9997880910740103
结果与预先设定的Y=2*X1-3*X2+X3+8相近,这里采用随机产生样本,每次运行结果不尽相同。
由于是多维,不方便画图展示。