import numpy as np
import matplotlib.pyplot as plt
# 1.计算cost
def cost_function(data,x,y,theta0,theta1):
m = data.shape[0] # 样本数据的数量
cost = np.sum((theta0 + theta1 * x - y) ** 2) / (2 * m)
return cost
# 2.梯度下降 batch gradient descend
def BGD(data,x,y,theta0,theta1,alpha):
m = data.shape[0]
theta0 = theta0 - alpha * np.sum(theta0 + theta1 * x - y) / m
theta1 = theta1 - alpha * np.sum((theta0 + theta1 * x - y) * x) / m
return theta0,theta1
def linear_regression():
data = np.loadtxt('data.csv',dtype=np.float32,delimiter=',') # loadtxt读取csv文件
x = data[:,0] # 第一列数据
y = data[:,1] # 第二列数据
count = 10000
theta0 = theta1 = 10
alpha = 0.01
# 多次迭代
for _ in range(count):
theta0,theta1 = BGD(data,x,y,theta0,theta1,alpha)
print(f'Final parameters:θ0 = {theta0:.4f} θ1 = {theta1:.4f} cost = {cost_function(data,x,y,theta0,theta1):.4f}')
# 绘制拟合曲线
plt.scatter(x,y)
plt.plot(x,theta0 + theta1 * x, 'r')
plt.show()
if __name__ == '__main__':
linear_regression()
import numpy as np
import matplotlib.pyplot as plt
# 1.计算cost function
def cost_function(data,x,y,theta0,theta1):
m = data.shape[0] # 样本数据的数量
cost = np.sum((theta0 + theta1 * x - y) ** 2) / (2 * m)
return cost
# 2.梯度下降
def batch_gredient_descend(data,x,y,theta0,theta1,alpha):
m = data.shape[0]
theta0 = theta0 - alpha * np.sum(theta0 + theta1 * x - y) / m
theta1 = theta1 - alpha * np.sum((theta0 + theta1 * x - y) * x) / m
return theta0,theta1
# 3.多次迭代(梯度下降)
def multi_BGD(data,x,y,theta0,theta1,alpha,count,cost_list):
for i in range(count):
theta0,theta1 = batch_gredient_descend(data,x,y,theta0,theta1,alpha)
if i % 100 == 0:
# 每100次输出一下
print(f'Epoch {i}: θ0 = {theta0:.4f} θ1 = {theta1:.4f} cost = {cost_function(data,x,y,theta0,theta1):.4f}')
cost = cost_function(data,x,y,theta0,theta1)
cost_list.append(cost) # 将每一次的cost记录下来,便于后续绘图观察有无收敛
return theta0,theta1
# 4.绘制拟合曲线
def plot_linear(x,y,theta0,theta1):
plt.title('Results Of Iterations')
plt.xlabel('x'); plt.ylabel('y')
plt.scatter(x,y)
plt.plot(x,theta0+theta1*x,'r')
plt.show()
def linear_regression():
data = np.loadtxt('data.csv',dtype=np.float32,delimiter=',') # loadtxt读取csv文件
x = data[:,0] # 第一列数据
y = data[:,1] # 第二列数据
# 参数初始化
theta0 = theta1 = 10
alpha = 0.01
count = 10000 # 梯度下降1000次
cost_list = []
# 输出最开始的参数
print(f'Begin para:θ0 = {theta0:.4f} θ1 = {theta1:.4f} alpha = {alpha:.4f}')
theta0,theta1 = multi_BGD(data,x,y,theta0,theta1,alpha,count,cost_list)
# 输出变化后的参数
print(f'End para:θ0 = {theta0:.4f} θ1 = {theta1:.4f} alpha = {alpha:.4f}')
plt.plot(cost_list) #绘制cost变化
plt.show()
plot_linear(x,y,theta0,theta1)
if __name__ == '__main__':
linear_regression()
data.csv下载
数据来源:流年若逝
可参考以下地址:(含单变量线性回归、多变量线性回归及正规方程)
GitHub地址