局部加权线性回归、岭回归、LASSO回归

1、导包

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
plt.rcParams["font.sans-serif"] = ['SimHei']

2、局部加权线性回归

# 局部加权线性回归
# 增加了核函数,使用高斯核函数,相当于只用于当前数据点相近的部分数据计算回归系数
# x训练集的特征矩阵,y训练集的标签矩阵
def lrlw(test_point, x, y, k):
    xMat = np.mat(x)  #将数据转换为矩阵
    yMat = np.mat(y).T
    m = np.shape(x)[0]  #数据总行数
    weights = np.mat(np.eye(m))  # 生成对角矩阵
    for i in range(m):
        diffMat = test_point - xMat[:, i]  # 计算与数据集中其他点的距离
        weights[i, i] = np.exp(diffMat * diffMat.T / (-2.0 * k ** 2))  #计算权重对角矩阵
    xtx = (xMat * weights) * xMat.T  # 对x值进行加权计算

    if np.linalg.det(xtx) == 0:
        print("the matrix is singular, cannot do inverse")
        return 0
    y = xtx.I * xMat * weights * yMat  # 计算回归系数对y加权
    return test_point * y


# 对所有点计算估计值
def lrlwTest(xd, yd, k):
    ytest = np.zeros(xd.shape[0])  # 生成一个为0的一维向量
    for i in range(xd.shape[0]):
        ytest[i] = lrlw(xd[i], xd, yd, k)
    return ytest


def lrlw_display(xd, yd, K):
    ytest = lrlwTest(xd, yd, K)
    # 返回数据从小到大的索引值
    sorted_index = xd.argsort(0)
    x_sorted = xd[sorted_index]
    plt.plot(x_sorted, ytest[sorted_index])  # 折线图
    plt.scatter(xd, yd, color='red')
    plt.title("局部加权线性回归 K = {}".format(K), fontproperties="SimHei", fontSize=16)
    plt.show()

3、岭回归

# 岭回归
# 计算ws (xTx+IMat).I * xTy
def ridgeRegres(testPoint, xMat, yMat, lam):
    xTx = xMat * xMat.T
    denom = xTx + np.eye(np.shape(xMat)[0]) * lam
    if np.linalg.det(denom) == 0.0:
        print("矩阵不可逆")
        return
    ws = denom.I * (xMat * yMat)
    return ws * testPoint


def ridgeTest(xArr, yArr, lam):
    yhat = np.zeros(xArr.shape[1])
    for i in range(xArr.shape[1]):  # 测试不同的lambda取值,获得系数
        yhat[i] = ridgeRegres(xArr[:, i], xArr, yArr, lam)
    return yhat


def ridge_display(xd, yd, lam):
    xmat = np.mat(xd)
    ymat = np.mat(yd).T
    ridge = ridgeTest(xmat, ymat, lam)
    sorted_index = xd.argsort(0)
    x_sorted = xd[sorted_index]
    plt.plot(x_sorted, ridge[sorted_index])
    plt.title("岭回归 lam = {}".format(lam), fontproperties="SimHei", fontSize=16)
    plt.scatter(xd, yd, color='red')
    plt.show()

4、LASSO回归

def LASSO(xd, yd, lam, learning_rate=0.01, epochs=2000):
    # 归一化
    x_normal = (xd - xd.mean()) / xd.std()
    x_raw = x_normal.reshape(-1, 1)
    y_raw = yd.reshape(-1, 1)
    w = np.random.randn(x_raw.shape[1], 1)
    b = 0
    for i in range(epochs):
        num_train = x_raw.shape[0]
        y_hat = np.dot(x_raw, w) + b
        w -= learning_rate * (np.dot(x_raw.T, (y_hat - y_raw)) / num_train + lam)
        b -= learning_rate * (np.sum((y_hat - y_raw)) / num_train)
    plt.plot(xd, w * x_raw + b)
    plt.title("LASSO lam = {}".format(lam), fontproperties="SimHei", fontSize=16)
    plt.scatter(xd, yd, color='green')
    plt.show()

5、主函数

if __name__ == "__main__":
    data = pd.read_csv("E:/data/regression.csv")
    data = data.values
    xd = data[:, 0]  # 取二维数组的第一维的所有数据,取第二维的第一位数据
    yd = data[:, 1]
    lrlw_display(xd, yd, 0.01)
    lrlw_display(xd, yd, 0.05)
    lrlw_display(xd, yd, 0.1)
    lrlw_display(xd, yd, 1)
    ridge_display(xd, yd, 3)
    ridge_display(xd, yd, 0.1)
    LASSO(xd, yd, 0.15)
    LASSO(xd, yd, 0.33)

6、数据集
链接:https://pan.baidu.com/s/1JyaeeAPRhkYsN7DGzsBuWQ?pwd=4S6g 
提取码:4S6g 

你可能感兴趣的:(python,机器学习)