Python:使用cvxpy求解线性回归问题

优化分体为残差平方和最小,这是一个凸优化问题,使用cvxpy,过程过于简单。

    ##------数据生成部分----------##
    seed = 101
    X = np.random.uniform(low=1.0,high=10.0,size=300)
    y = X * 20 + 10 + np.random.normal(loc=0.0,scale=10.0,size=300)
    ##--------凸优化部分-------------##
    w = cvx.Variable()
    b = cvx.Variable()
    objective = cvx.Minimize(cvx.sum_squares((w * X + b) - y))
    constrant = []
    prob = cvx.Problem(objective,[])
    prob.solve()
    ##------结果可视化部分----------##
    print(w.value)
    print(b.value)
    show_data(X,y,w.value,b.value)
    plt.show()
    print("Loss=",np.mean(((w.value * X + b.value) - y)**2))

 

依赖cvxpy你就废了!你啥都学不到!

配上梯度下降方法计算出的对比。

'''
一元线性回归
代码可以正确运行,运行结果良好
'''
import numpy as np
import pandas as pd
import cvxpy as cvx
import matplotlib.pyplot as plt
import os

class LinearRegression(object):
    def __init__(self,learning_rate=0.01,max_iter=100,seed=101):
        self.w = np.random.randn(1)[0]
        self.b = np.random.randn(1)[0]
        self.lr = learning_rate
        self.max_iter = max_iter
        self.loss_arr = []

    def fit(self,X,y):
        self.X = X
        self.y = y
        for i in range(self.max_iter):
            self.Grandient_descent()
            self.loss_arr.append(self.loss())
    def model(self,X):
        return self.w * X + self.b

    def loss(self,y_true=None,y_pred=None):
        if y_true is None or y_pred is None:
            y_true = self.y
            y_pred = self.model(self.X)
        return np.sum((y_true - y_pred) ** 2)

    def cal_gradient(self):
        d_w = np.sum(2 * (self.model(self.X) - self.y) * self.X)/len(self.X) #不理解为什么要取平均
        d_b = np.sum(2 * (self.model(self.X) - self.y))/len(self.X)   #不理解为什么要取平均
        return d_w, d_b

    def Grandient_descent(self):
        d_w,d_b = self.cal_gradient()
        self.w -= self.lr * d_w
        self.b -= self.lr * d_b

def show_data(X,y,w=None,b=None):
    plt.scatter(X,y,marker='.')
    if w is not None and b is not None:
        plt.plot(X, X*w + b, c='r')


if __name__ == '__main__':
    seed = 101
    X = np.random.uniform(low=1.0,high=10.0,size=300)
    y = X * 20 + 10 + np.random.normal(loc=0.0,scale=10.0,size=300)
    regr = LinearRegression(learning_rate=0.01, max_iter=100, seed=101)
    regr.fit(X,y)

    # plt.plot(np.arange(len(regr.loss_arr)),regr.loss_arr,marker='o',c='green')
    w = cvx.Variable()
    b = cvx.Variable()
    objective = cvx.Minimize(cvx.sum_squares((w * X + b) - y))
    constrant = []
    prob = cvx.Problem(objective, [])
    prob.solve()
    show_data(X, y, w.value, b.value)
    show_data(X, y, regr.w, regr.b)
    plt.show()
    print("Loss=", np.mean(((w.value * X + b.value) - y) ** 2))
    print(regr.loss_arr[-1]/len(X))

 

手动调整梯度下降迭代次数

max_iter=100

迭代次数越大,梯度下降的计算结果越贴近cvxpy的优化结果。

你可能感兴趣的:(线性回归)