1、导包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
plt.rcParams["font.sans-serif"] = ['SimHei']
2、局部加权线性回归
# 局部加权线性回归
# 增加了核函数,使用高斯核函数,相当于只用于当前数据点相近的部分数据计算回归系数
# x训练集的特征矩阵,y训练集的标签矩阵
def lrlw(test_point, x, y, k):
xMat = np.mat(x) #将数据转换为矩阵
yMat = np.mat(y).T
m = np.shape(x)[0] #数据总行数
weights = np.mat(np.eye(m)) # 生成对角矩阵
for i in range(m):
diffMat = test_point - xMat[:, i] # 计算与数据集中其他点的距离
weights[i, i] = np.exp(diffMat * diffMat.T / (-2.0 * k ** 2)) #计算权重对角矩阵
xtx = (xMat * weights) * xMat.T # 对x值进行加权计算
if np.linalg.det(xtx) == 0:
print("the matrix is singular, cannot do inverse")
return 0
y = xtx.I * xMat * weights * yMat # 计算回归系数对y加权
return test_point * y
# 对所有点计算估计值
def lrlwTest(xd, yd, k):
ytest = np.zeros(xd.shape[0]) # 生成一个为0的一维向量
for i in range(xd.shape[0]):
ytest[i] = lrlw(xd[i], xd, yd, k)
return ytest
def lrlw_display(xd, yd, K):
ytest = lrlwTest(xd, yd, K)
# 返回数据从小到大的索引值
sorted_index = xd.argsort(0)
x_sorted = xd[sorted_index]
plt.plot(x_sorted, ytest[sorted_index]) # 折线图
plt.scatter(xd, yd, color='red')
plt.title("局部加权线性回归 K = {}".format(K), fontproperties="SimHei", fontSize=16)
plt.show()
3、岭回归
# 岭回归
# 计算ws (xTx+IMat).I * xTy
def ridgeRegres(testPoint, xMat, yMat, lam):
xTx = xMat * xMat.T
denom = xTx + np.eye(np.shape(xMat)[0]) * lam
if np.linalg.det(denom) == 0.0:
print("矩阵不可逆")
return
ws = denom.I * (xMat * yMat)
return ws * testPoint
def ridgeTest(xArr, yArr, lam):
yhat = np.zeros(xArr.shape[1])
for i in range(xArr.shape[1]): # 测试不同的lambda取值,获得系数
yhat[i] = ridgeRegres(xArr[:, i], xArr, yArr, lam)
return yhat
def ridge_display(xd, yd, lam):
xmat = np.mat(xd)
ymat = np.mat(yd).T
ridge = ridgeTest(xmat, ymat, lam)
sorted_index = xd.argsort(0)
x_sorted = xd[sorted_index]
plt.plot(x_sorted, ridge[sorted_index])
plt.title("岭回归 lam = {}".format(lam), fontproperties="SimHei", fontSize=16)
plt.scatter(xd, yd, color='red')
plt.show()
4、LASSO回归
def LASSO(xd, yd, lam, learning_rate=0.01, epochs=2000):
# 归一化
x_normal = (xd - xd.mean()) / xd.std()
x_raw = x_normal.reshape(-1, 1)
y_raw = yd.reshape(-1, 1)
w = np.random.randn(x_raw.shape[1], 1)
b = 0
for i in range(epochs):
num_train = x_raw.shape[0]
y_hat = np.dot(x_raw, w) + b
w -= learning_rate * (np.dot(x_raw.T, (y_hat - y_raw)) / num_train + lam)
b -= learning_rate * (np.sum((y_hat - y_raw)) / num_train)
plt.plot(xd, w * x_raw + b)
plt.title("LASSO lam = {}".format(lam), fontproperties="SimHei", fontSize=16)
plt.scatter(xd, yd, color='green')
plt.show()
5、主函数
if __name__ == "__main__":
data = pd.read_csv("E:/data/regression.csv")
data = data.values
xd = data[:, 0] # 取二维数组的第一维的所有数据,取第二维的第一位数据
yd = data[:, 1]
lrlw_display(xd, yd, 0.01)
lrlw_display(xd, yd, 0.05)
lrlw_display(xd, yd, 0.1)
lrlw_display(xd, yd, 1)
ridge_display(xd, yd, 3)
ridge_display(xd, yd, 0.1)
LASSO(xd, yd, 0.15)
LASSO(xd, yd, 0.33)
6、数据集
链接:https://pan.baidu.com/s/1JyaeeAPRhkYsN7DGzsBuWQ?pwd=4S6g
提取码:4S6g