《统计学习方法》第二章python实现:
例2.1(采用随机梯度下降)
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: mine0304.py
@time: 2017/3/4 9:32
@software: PyCharm
"""
import numpy as np
x_train=np.array([[3,3],[4,3], [1,1]])
y_train=np.array([1,1,-1])
alpha=1
omega=np.array([0,0])
b=0
i=0
while i < len(x_train):
if (x_train[i].dot(omega)+b)*y_train[i] <=0:
omega=omega+alpha*y_train[i]*x_train[i]
b=b+alpha*y_train[i]
i=0
else:
i=i+1
print i,omega, b
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: XiangguoSun
@contact: [email protected]
@file: SGD.py
@time: 2017/3/4 11:41
@software: PyCharm
"""
import numpy as np
import matplotlib.pyplot as plt
import math
def prodata(omega1=2,omega2=-3,b=5,seed=1,size=30):
np.random.seed(seed)
omega=np.array([omega1,omega2])
x1 = np.arange(0, size)
t = np.random.normal(loc=0, scale=5,size=size)
x2=t-(b+omega[0]*x1)/omega[1]*1.0
y_train=[]
x_train=np.array(zip(x1,x2))
for x in t:
if x>=0:
y_train.append(1)
else:
y_train.append(-1)
y_train=np.array(y_train)
return x_train,y_train
def SGD(x_train,y_train):
alpha=0.01
omega=np.array([0,0])
b,i=0,0
while i < len(x_train):
if (x_train[i].dot(omega)+b)*y_train[i] <=0:
omega=omega+alpha*y_train[i]*x_train[i]
b=b+alpha*y_train[i]
i=0
else:
i=i+1
return omega,b
if __name__ == '__main__':
#生成原始数据
omega1,omega2,b=2,-3,5
size=30
x_train,y_train=prodata(omega1,omega2,b,1,size)
#梯度下降法参数估计
omega_estimate,b_estimate=SGD(x_train,y_train)
#打印结果
print "actual is :",omega1,omega2,b
n=math.sqrt((omega1**2+omega2**2+b**2)*1.0)
print "norm is ",omega1*1.0/n,omega2*1.0/n,b*1.0/n
print "estimate is :",omega_estimate[0],omega_estimate[1],b_estimate
#画图
fig = plt.figure()
ax1 = fig.add_subplot(111)
plt.xlabel('X1')
plt.ylabel('X2')
size=30
x1 = np.arange(0, 35,34)
omega=np.array([omega1,omega2])
x2 = -(b+omega1*x1)/omega2*1.0
ax1.plot(x1,x2,c="black")#实际的直线,黑色
x2 = -(b_estimate+omega_estimate[0]*x1)/omega_estimate[1]*1.0#拟合的直线,红色
ax1.plot(x1,x2,c="red")
for i in range(0,len(x_train)):
if y_train[i]>0:
ax1.scatter(x_train[i,0],x_train[i,1],c = "r" ,marker = 'o')
else:
ax1.scatter(x_train[i, 0], x_train[i, 1], c="b", marker='^')
plt.show()
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: XiangguoSun
@contact: [email protected]
@file: SGD2.py
@time: 2017/3/4 11:41
@software: PyCharm
"""
import numpy as np
import matplotlib.pyplot as plt
import math
def prodata(omega1=2,omega2=-3,b=5,seed=1,size=30):
np.random.seed(seed)
omega=np.array([omega1,omega2])
x1 = np.arange(0, size)
t = np.random.normal(loc=0, scale=5,size=size)
x2=t-(b+omega[0]*x1)/omega[1]*1.0
y_train=[]
x_train=np.array(zip(x1,x2))
for x in t:
if x>=0:
y_train.append(1)
else:
y_train.append(-1)
y_train=np.array(y_train)
return x_train,y_train
def SGD(x_train,y_train):
alpha=0.01
omega=np.array([0,0])
b,i=0,0
while i < len(x_train):
if (x_train[i].dot(omega)+b)*y_train[i] <=0:
omega=omega+alpha*y_train[i]*x_train[i]
b=b+alpha*y_train[i]
i=0
else:
i=i+1
return omega,b
def SGD2(x_train,y_train,epsion = 1,alpha = 0.01):
omega,b=np.array([0,0]),0
def get_sample(omega,b,x_train,y_train):
x,y=[],[]
for i in range(0,len(x_train)):
if(x_train[i].dot(omega) + b) * y_train[i] <= 0:
x.append(x_train[i])
y.append(y_train[i])
x=np.array(x)
y=np.array(y)
return x,y
def gradient_sum(x,y):
return (y.reshape(-1, 1) * x).sum(0), y.sum()
def target(omega,b,x_train,y_train):
x,y=get_sample(omega,b,x_train,y_train)
if x !=[]:
return -(y*((omega*x).sum(1)+b)).sum(),x,y
else:
return 0,x,y
old, x,y= target(omega,b,x_train,y_train)
now = old-(epsion +1)
while abs(old-now) > epsion and len(x) !=0:
omega=omega+alpha*gradient_sum(x,y)[0]
b=b+alpha*gradient_sum(x,y)[1]
old=now
now,x,y=target(omega,b,x_train,y_train)
print "omega,b,now", omega, b, now
return omega,b,old-now
if __name__ == '__main__':
#生成原始数据
omega1,omega2,b=2,-3,5
size=30
x_train,y_train=prodata(omega1,omega2,b,1,size)
#梯度下降法参数估计
omega_estimate,b_estimate,erro=SGD2(x_train,y_train)
# omega_estimate, b_estimate = SGD(x_train, y_train)
#打印结果
print "actual is :",omega1,omega2,b
n=math.sqrt((omega1**2+omega2**2+b**2)*1.0)
print "norm is ",omega1*1.0/n,omega2*1.0/n,b*1.0/n
print "estimate is :",omega_estimate[0],omega_estimate[1],b_estimate
print "erro:",erro
#画图
fig = plt.figure()
ax1 = fig.add_subplot(111)
plt.xlabel('X1')
plt.ylabel('X2')
x1 = np.arange(0, 35,34)
omega=np.array([omega1,omega2])
x2 = -(b+omega1*x1)/omega2*1.0
ax1.plot(x1,x2,c="black")#实际的直线,黑色
x2 = -(b_estimate+omega_estimate[0]*x1)/omega_estimate[1]*1.0#拟合的直线,红色
ax1.plot(x1,x2,c="red")
for i in range(0,len(x_train)):
if y_train[i]>0:
ax1.scatter(x_train[i,0],x_train[i,1],c = "r" ,marker = 'o')
else:
ax1.scatter(x_train[i, 0], x_train[i, 1], c="b", marker='^')
plt.show()
学习率更新公式:退火公式(参见代码)
def SGD3(x_train,y_train,epsion = 1):
iterator=0
alpha=1/(1+iterator*0.001)#退火公式
omega,b=np.array([0,0]),0
def get_sample(omega,b,x_train,y_train):
x,y=[],[]
for i in range(0,len(x_train)):
if(x_train[i].dot(omega) + b) * y_train[i] <= 0:
x.append(x_train[i])
y.append(y_train[i])
x=np.array(x)
y=np.array(y)
return x,y
def gradient_sum(x,y):
return (y.reshape(-1, 1) * x).sum(0), y.sum()
def target(omega,b,x_train,y_train):
x,y=get_sample(omega,b,x_train,y_train)
if x !=[]:
return -(y*((omega*x).sum(1)+b)).sum(),x,y
else:
return 0,x,y
old, x,y= target(omega,b,x_train,y_train)
now = old-(epsion +1)
while abs(old-now) > epsion and len(x) !=0:
omega=omega+alpha*gradient_sum(x,y)[0]
b=b+alpha*gradient_sum(x,y)[1]
old=now
now,x,y=target(omega,b,x_train,y_train)
print "omega,b,now", omega, b, now,alpha
iterator = iterator+1
alpha = alpha / (1 + iterator * 0.001)
return omega,b,old-now
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: XiangguoSun
@contact: [email protected]
@file: SGD2.py
@time: 2017/3/4 11:41
@software: PyCharm
"""
import numpy as np
def SGD4(x_train,y_train):
N=len(x_train)
alpha=np.zeros((N,))
def Gram(x_train):
gram=np.zeros((len(x_train),len(x_train)))
for i in range(0,len(x_train)):
for j in range(0,len(x_train)):
gram[i,j]=x_train[i].dot(x_train[j])
return gram
b,i,theta=0,0,1
gram=Gram(x_train)
print gram
while i < len(x_train):
print i
alpha*y_train*gram[:,i]
if ((alpha*y_train).dot(gram[:,i])+b)*y_train[i] <=0:
alpha[i]=alpha[i]+theta
b=b+theta*y_train[i]
i=0
else:
i=i+1
print alpha, b
return alpha,b
if __name__ =='__main__':
x_train=np.array([[3,3],[4,3],[1,1]])
y_train=np.array([1,1,-1])
print SGD4(x_train,y_train)
课后习题2.1很简单,只需要自己生成亦或数据集,再调用上面的方法即可
课后习题2.2,在上文已经解答(参见例2.1的扩充:增加数据集)
觉得不错,不妨赏几个银子。谢谢侬
X)—-