概念就不介绍了,记录下公式推导和代码实现,以及与最小二乘的比较。
吴恩达老师课程中使用一个参数theta保存两个变量,不过我好像没把中间变量的形状对齐,所以最后实现了两个参数的版本。
代码:
import time
import numpy as np
import pandas
import matplotlib.pyplot as plt
#随机种子
rd = np.random.RandomState(round(time.time()))
#输出一个5*5的单位矩阵、随机矩阵
arr1 = np.ones((5,5))
arr2 = rd.randint(-10,10,(2,3))
print('5*5单位矩阵:')
print(arr1)
print('2*3随机int矩阵:')
print(arr2)
#随机生成散点数据
Population = rd.uniform(1,50,100) #均匀分布
Profit = rd.rand(100)*1000 #高斯分布
#排序(模拟线性数据) 如果是numpy array,则要使用numpy.sort()
Population.sort()
Profit.sort()
#绘制散点图
fig = plt.figure(figsize=(16, 9), dpi=40)
plt.scatter(Population, Profit) #散点图
plt.grid(color='r', linestyle='--', linewidth=1,alpha=0.3)
plt.show()
#构造代价函数Cost,使用MSE估计误差,为方便求导将目标函数设置为MSE/2m (m为样本数量)
#对于线性回归y=wx+b,回归参数有两个:w与b,我们将打包其放入“theta”中
def costFunction(x,y,theta):
tmp = np.matmul(x,theta)
cost = np.power(tmp-y, 2)
return np.sum(cost)/(2*len(x))
#梯度下降
def gradientDescent(x, y, theta, rate, iters):
X = np.array(x.reshape(len(x),1))
Y = np.array(y.reshape(len(y),1))
theta = theta.reshape(2,1)
#X添加一列ones,以便与theta相乘
X = np.insert(X, 1, np.array(np.ones(len(x))), axis=1)
temp = np.matrix(np.zeros(theta.shape))
cost = np.zeros(iters)
i = 0
cost[i] = 1e-4
while i < iters and cost[i]>1e-5 :
error = np.dot(X , theta) - Y
print(error.shape)
print(X[:,0].shape)
term = error * X[:,0]
print(term.shape)
theta[0,0] = theta[0,0] - ((rate /(len(x))) * (( np.sum(term) )))
theta[1,0] = theta[1,0] - ((rate /(len(x))) * ((np.sum(error))) )
cost[i] = costFunction(X, Y, theta)
return theta, cost
#两个参数版本的costFuction与gradientDescent
def costFun(x,y,theta_w,theta_b):
x = x.reshape(len(x),1)
y = y.reshape(len(y),1)
cost = (np.dot(x,theta_w) + theta_b) - y
return np.sum(np.power(cost,2))/(len(x)*2)
def gradientDescent2(x,y,rate,iters,theta_w,theta_b):
x = np.array(x.reshape(len(x),1))
y = np.array(y.reshape(len(y),1))
y_hat = np.dot(x,theta_w) + theta_b
error = np.array(y_hat - y)
dw = 2*(np.vdot(x,error))
db = sum(2*error)
i = 0
cost = np.zeros(iters)
cost[i] = 1e-4
while i 1e-5:
y_hat = np.dot(x,theta_w) + theta_b
error = y_hat - y
dw = 2*np.vdot(x,error)
db = sum(2*error)
#更新参数
theta_w = theta_w - rate / (len(x)*2)*dw
theta_b = theta_b - rate / (len(x)*2)*db
cost[i] = costFun(x,y,theta_w,theta_b) #打印出每一轮迭代的cost,查看是否一直在变小
i+=1
Theta = list([theta_w,theta_b])
return Theta,cost
#初始化,需保证输入的数据为numpy matrix
#x = np.matrix(Population.values)
#y = np.matrix(Profit.values)
#超参数
rate = 0.001 #学习率
iters = 1500 #迭代次数
theta = np.matrix(np.array([10,5])) # a与b的初始值为0
result2 = gradientDescent2(Population, Profit, rate,iters,1,1)
print(result2)
#训练结果可视化
plt.title('Linear Regression with 1D Feature')
X = np.linspace(np.min(Population),np.max(Population*1.1),len(Population)*100)
Y = X*result2[0][0] + result2[0][1]
ax1 = plt.scatter(Population, Profit, marker= 'o', s=50)
plt.plot(X,Y,'r', label='Gradient Descending')
plt.grid(color='r', linestyle='--', linewidth=1,alpha=0.3)
plt.xlabel('Input_value(x)')
plt.ylabel('Output_value(y)')
plt.show()
# 最小二乘回归拟合对比梯度下降=============================================================================
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
# 生产一元随机回归数据集
# X,Y = make_regression(n_samples=10,n_features=1,n_informative=1,\
# n_targets=1,noise=0.1,random_state=0)
Population = Population.reshape(np.size(Population),1)
Profit = Profit.reshape(np.size(Profit),1)
# 数据分割
X_train,X_test,Y_train,Y_test = train_test_split(Population, Profit,test_size=1/4,\
random_state=0)
# 最小二乘
linreg = LinearRegression().fit(X_train,Y_train)
print('模型系数w:{}\n模型截距b:{}\n训练集R^2得分:{}\n测试集R^2得分:{:.3f}'\
.format(linreg.coef_, linreg.intercept_,linreg.score(X_train,Y_train)\
,linreg.score(X_test,Y_test)))
ax2 = plt.scatter(Population, Profit, marker= 'o', s=10,label='Samples')
X_ols = np.linspace(np.min(Population),np.max(Population*1.5),len(Population)*100)
Y_ols = X_ols * linreg.coef_+linreg.intercept_
Y_ols = Y_ols.reshape(np.size(Y_ols),1)
print(Y_ols.shape)
ax3 = plt.plot(X_ols,Y_ols,'g',label='Least Sum of Squares')
print("ax3:")
print(ax3)
#梯度下降
result = gradientDescent2(np.array(X_train), np.array(Y_train), rate,iters,1,1)
print(result)
X_d = np.linspace(np.min(X),np.max(X*1.5),len(X)*100)
Y_d = X_d*result[0][0] + result[0][1]
ax4 = plt.plot(X_d,Y_d,'b',label='gradientDescent')
plt.legend()
#形状对不上..
# result1 = gradientDescent(Population, Profit, theta, rate, iters)
# print(result1)
输出结果:
纸上习得终觉浅..
吴恩达机器学习ex1 - Heywhale.com
一文看懂简单线性回归:梯度下降法和最小二乘法(代码实现及数学公式详解)_Maxxi Chen的博客-CSDN博客_最小二乘法和梯度下降法实现线性模型的过程