Tips:采用梯度下降法实现线性回归,同时可批量绘制散点图和拟合曲线以及损失图。
样例数据:
拟合曲线图:
损失曲线:
Python代码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 22 15:05:16 2021
E-mail: [email protected]
@author: xiao_gf
"""
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['Simhei'] #解决中文字符乱码问题
# 读取Excel文件
def read_data(excel_path):
df = pd.read_excel(excel_path,sheet_name = 0) #sheet_name = 0代表第1个工作表
nrows = df.shape[0] # 样本数量
x1 = np.array(df.iloc[:,0]).reshape(nrows,1)
y = np.array(df.iloc[:,1]).reshape(nrows,1)
x0 = np.ones((nrows,1)) # 为了矩阵方便计算,对x增加1列,值为1,作为b的系数
x = np.hstack((x0,x1))
return nrows, x1, x, y
# 损失函数 均方误差 MSE = 1 / (2*m)*(y_pred - y_true)**2
def cost_function(theta, X, Y, m):
diff = np.dot(X, theta) - Y # dot()矩阵相乘
return (1/(2*m)) * np.dot(diff.transpose(), diff)
# 定义代价函数对应的梯度函数
def gradient_function(theta, X, Y, m):
diff = np.dot(X, theta) - Y
return (1/m) * np.dot(X.transpose(), diff)
# 梯度下降迭代
def gradient_descent(X, Y, alpha, m):
num, err= [], []
i=0
theta = np.array([1, 1]).reshape(2, 1) # theta初始化
gradient = gradient_function(theta, X, Y,m)
while not all(abs(gradient) <= 1e-5):
theta = theta - alpha * gradient
gradient = gradient_function(theta, X, Y, m)
i+=1
num.append(i) #统计次数
err.append(abs(gradient[1][0])) # 统计损失
return theta, num, err
if __name__ == '__main__':
path = r"D:\test2\test.xlsx"
nrows, x1, x, y=read_data(path)
alpha = 0.001
optimal, num, err = gradient_descent(x, y, alpha, nrows)
# 拟合函数
y1 = optimal[0] + optimal[1]*x1 #拟合后的方程
# 均方误差
MSE = np.sum((y1-y)**2)/nrows
# print(MSE)
# 均方根误差
RMSE = math.sqrt(MSE)
# 相关系数
R = np.corrcoef(x1.T, y.T)[0,1]
# 绘制拟合曲线图
plt.subplot(111)
plt.scatter(x1, y, s=20, c="b", marker="o")
plt.xlabel("X")
plt.ylabel("Y")
plt.plot(x1, y1,linewidth = '1',color='red')
plt.legend([" $Y = {}+{}*X$\n $R^2 = {}$\n $RMSE = {}$".format(round(float(optimal[0]),3),round(float(optimal[1]),3),round(R**2,3),round(RMSE,3))],fontsize=10)
plt.savefig('D:/test2/拟合曲线.jpg', dpi = 600)
plt.show()
# 绘制误差曲线图
plt.subplot(111)
plt.plot(num, err,linewidth = '1',color='red',label=u'损失曲线')
plt.xlabel(u'次数')
plt.ylabel(u'损失值')
plt.legend(loc='upper right',fontsize=12)
plt.savefig('D:/test2/损失曲线.jpg', dpi = 600)
plt.show()