线性回归(Linear Regression) Python实现

1. 精简版本

import numpy as np
import matplotlib.pyplot as plt

# 1.计算cost 
def cost_function(data,x,y,theta0,theta1):
    m = data.shape[0] # 样本数据的数量
    cost = np.sum((theta0 + theta1 * x - y) ** 2) / (2 * m)
    return cost

# 2.梯度下降 batch gradient descend
def BGD(data,x,y,theta0,theta1,alpha):
    m = data.shape[0]
    theta0 = theta0 - alpha * np.sum(theta0 + theta1 * x - y) / m
    theta1 = theta1 - alpha * np.sum((theta0 + theta1 * x - y) * x) / m
    return theta0,theta1
    
def linear_regression():
    data = np.loadtxt('data.csv',dtype=np.float32,delimiter=',') # loadtxt读取csv文件
    x = data[:,0] # 第一列数据 
    y = data[:,1] # 第二列数据
    count = 10000
    theta0 = theta1 = 10
    alpha = 0.01
    # 多次迭代
    for _ in range(count):
        theta0,theta1 = BGD(data,x,y,theta0,theta1,alpha)
    print(f'Final parameters:θ0 = {theta0:.4f}  θ1 = {theta1:.4f}  cost = {cost_function(data,x,y,theta0,theta1):.4f}')
    # 绘制拟合曲线
    plt.scatter(x,y)
    plt.plot(x,theta0 + theta1 * x, 'r')
    plt.show()
    
if __name__ == '__main__':
    linear_regression()

2. 详细版本

import numpy as np
import matplotlib.pyplot as plt

# 1.计算cost function
def cost_function(data,x,y,theta0,theta1):
    m = data.shape[0] # 样本数据的数量
    cost = np.sum((theta0 + theta1 * x - y) ** 2) / (2 * m)
    return cost

# 2.梯度下降
def batch_gredient_descend(data,x,y,theta0,theta1,alpha):
    m = data.shape[0]
    theta0 = theta0 - alpha * np.sum(theta0 + theta1 * x - y) / m
    theta1 = theta1 - alpha * np.sum((theta0 + theta1 * x - y) * x) / m
    return theta0,theta1
    
# 3.多次迭代(梯度下降)
def multi_BGD(data,x,y,theta0,theta1,alpha,count,cost_list):
    for i in range(count):
        theta0,theta1 = batch_gredient_descend(data,x,y,theta0,theta1,alpha)
        if i % 100 == 0:
            # 每100次输出一下
            print(f'Epoch {i}: θ0 = {theta0:.4f}    θ1 = {theta1:.4f}    cost = {cost_function(data,x,y,theta0,theta1):.4f}')
        cost = cost_function(data,x,y,theta0,theta1)    
        cost_list.append(cost) # 将每一次的cost记录下来,便于后续绘图观察有无收敛
    return theta0,theta1

# 4.绘制拟合曲线
def plot_linear(x,y,theta0,theta1):
    plt.title('Results Of Iterations')
    plt.xlabel('x'); plt.ylabel('y')
    plt.scatter(x,y)
    plt.plot(x,theta0+theta1*x,'r')
    plt.show()

def linear_regression():
    data = np.loadtxt('data.csv',dtype=np.float32,delimiter=',') # loadtxt读取csv文件
    x = data[:,0] # 第一列数据 
    y = data[:,1] # 第二列数据
    # 参数初始化
    theta0 = theta1 = 10
    alpha = 0.01
    count = 10000 # 梯度下降1000次
    cost_list = []
    # 输出最开始的参数
    print(f'Begin para:θ0 = {theta0:.4f}    θ1 = {theta1:.4f}    alpha = {alpha:.4f}')
    theta0,theta1 = multi_BGD(data,x,y,theta0,theta1,alpha,count,cost_list)
    # 输出变化后的参数
    print(f'End para:θ0 = {theta0:.4f}    θ1 = {theta1:.4f}    alpha = {alpha:.4f}')
    plt.plot(cost_list) #绘制cost变化
    plt.show()
    plot_linear(x,y,theta0,theta1)
    
if __name__ == '__main__':
    linear_regression()

data.csv下载
数据来源:流年若逝

3. 进阶版本

可参考以下地址:(含单变量线性回归、多变量线性回归及正规方程)
GitHub地址

你可能感兴趣的:(AI,python,线性回归,机器学习)