(一) 随机梯度下降法:
上一篇博文,已经介绍了梯度下降算法。在实际中,为了实现梯度下降,往往分为随机梯度下降法和批量梯度下降法。
随机梯度下降法基本思路:
for j in range(j):
对于每一次更新参数,不必遍历所有的训练集合,仅仅使用了一个数据,来变换一个参数。这样做不如完全梯度下降的精确度高,可能会走很多弯路,但整体趋势是走向minmum。
这样做可以节省更多的时间,算法更快。
(二) 实现原理(参考《机器学习实战》)
from numpy import *
def loadDataSet():
dataMat = []; labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
def stocGradAscent0(dataMatrix,classLabels):
m,n = shape(dataMatrix)
alpha = 0.01
weights = ones(n) #initialize toall ones
for i in range(m):
h = sigmoid(sum(dataMatrix[i]*weights))
error = classLabels[i] - h
weights = weights + alpha * error * dataMatrix[i]
return weights
def stocGradAscent1(dataMatrix,classLabels, numIter=150):
m,n = shape(dataMatrix)
weights = ones(n) #initialize toall ones
for j in range(numIter):
dataIndex = range(m)
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001
#apha decreases with iteration, does not
randIndex = int(random.uniform(0,len(dataIndex)))
#go to 0 because of the constant
h = sigmoid(sum(dataMatrix[randIndex]*weights))
error = classLabels[randIndex] - h
weights = weights + alpha * error * dataMatrix[randIndex]
del(dataIndex[randIndex])
return weights
(三)、sklearn中随机梯度下降法应用举例:
from sklearn.linear_model importSGDClassifier
help(SGDClassifier)
clf = SGDClassifier(loss="hinge",penalty="l2")
from sklearn.linear_model importSGDClassifier
X = [[0., 0.], [1., 1.]]
y = [0, 1]
clf = SGDClassifier(loss="hinge",penalty="l2")
'''
SGDClassifier(alpha=0.0001, average=False,class_weight=None, epsilon=0.1,eta0=0.0, fit_intercept=True, l1_ratio=0.15,
learning_rate='optimal', loss='hinge',max_iter=None, n_iter=None,n_jobs=1, penalty='l2', power_t=0.5,random_state=None,
shuffle=True, tol=None, verbose=0, warm_start=False)
'''
clf.fit(X, y)
clf.predict([[1., .8]])
clf.coef_
clf.intercept_
clf.decision_function([[2., 2.]])
clf =SGDClassifier(loss="log",penalty="l2").fit(X, y)
print("predict_proba",clf.predict_proba([[3.,3.]]))
clf =SGDClassifier(loss="log",penalty="elasticnet").fit(X, y)
clf.predict_proba([[3., 3.]])