数据源:https://pan.baidu.com/s/1Y7py5uSZ03SbNtL3dTKBkg
# 简单线性回归(最小二乘法)
import numpy as np
import matplotlib.pyplot as plt
# 点
points = np.genfromtxt(r'data/data.csv',delimiter=',')
# 提取points中的两列数据分别作为X,Y
x = points[:,0]
y = points[:,1]
# 用plt画出散点图
plt.scatter(x,y)
# 显示图像
plt.show()
# 定义损失函数
# 损失函数:是系数的函数,另外还要传入数据x,y
def computer_cost(w,b,points):
total_cost = 0
M = len(points)
# 逐点计算平方损失误差,然后求平均数
for i in range(M):
x = points[i,0]
y = points[i,1]
total_cost += (y - w * x - b) ** 2
return total_cost/M
# 定义算法拟合函数
# 先定义一个求均值的函数
def average(data):
sum = 0
M = len(data)
for i in range(M):
sum += data[i]
return sum/M
# 定义核心拟合函数
def fit(point):
M = len(points)
x_bar = average(points[:, 0])
# 分子
sum_yx = 0
# 分母
sum_x2 = 0
# 变化率
sum_delta = 0
for i in range(M):
x = points[i,0]
y = points[i,1]
sum_yx += y * (x - x_bar)
sum_x2 += x ** 2
# 根据公式计算w
w = sum_yx / (sum_x2 - M * (x_bar**2) )
for i in range(M):
x = points[i,0]
y = points[i,1]
sum_delta += ( y - w * x)
b = sum_delta / M
return w, b
# 测试
w , b = fit(points)
print('w is :',w)
print('b is :',b)
cost = computer_cost(w,b,points)
print(cost)
# 画出拟合曲线
plt.scatter(x,y)
# 针对每一个X,计算出预测的y值
pred_y = w * x + b
plt.plot(x,pred_y,c='r')
plt.show()
1.实际结果和预测结果是已知的,只需求出w,b两个参数即可
2.求出平均损失值(进行求和,除总数)
yi:指的是实际值
xi:是预测值
3.求出预测值的平均值(对x进行累加,除总数)
4.最后求出w,b