梯度下降法
1、数据处理
实际处理中往往在进行梯度下降前对数据进行归一化。
2、梯度法解线性回归--简单例子
x = np.arange(-2,2,0.1)
y = 2*x+np.random.random(len(x))
x = x.reshape((len(x),1))
y = y.reshape((len(x),1))
for i in range(maxgen):#迭代
alpha = 1/float(i+1)+alpha0
e = np.dot(x,seta.reshape((len(seta),1)))+b-y # 二维列向量
mse = np.linalg.norm(e)
delta_seta = np.dot(e.T,x)[0]
delta_seta_norm = np.linalg.norm(delta_seta)
b = b-alpha*np.sum(e)
seta = seta-alpha*delta_seta
print u'迭代次数:',i
print u'梯度:',delta_seta_norm,'seta',seta,'b:',b,'mse',mse
print 'alpha:',alpha,'sum(e):',sum(e)
3、实列分析
def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
"""根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
def J(theta, X_b, y):
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(y)
except:
return float('inf')
def dJ(theta, X_b, y):
return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)
def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
theta = initial_theta
cur_iter = 0
while cur_iter < n_iters:
gradient = dJ(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
break
cur_iter += 1
return theta
附录
在简单例子中引用了以下链接:
原文链接:https://blog.csdn.net/just_do_it_123/article/details/51056260