本文以sklearn里的Boston数据集为例,尝试用批量梯度下降算法训练并预测数据。
一.以下是编写的类函数(路径为linear/test1):
import numpy as np
#采用批量梯度下降算法(BGD)解多元线性回归算法
class gd:
def __init__(self):
self.core = None
self.th = None
self.theat = None
def gra(self,x_train,y_train,eta):
x_train = np.array(x_train)
y_train = np.array(y_train)
x_train = x_train.reshape((len(y_train),-1))
assert x_train.shape[0] == len(y_train)
self.x_b = np.hstack([np.ones((len(y_train),1)),x_train])
ini_theat = np.zeros(self.x_b.shape[1])
def J(theat):
return ((y_train - self.predict(self.x_b,theat)).dot(y_train - self.predict(self.x_b,theat))) / len(y_train) #目标函数
def dJ(theat):
b = (self.x_b.T).dot(self.x_b.dot(theat) - y_train) * 2 / len(y_train) #求解梯度(前提是要自己先推出梯度的m×1矩阵,再来写代码)
return b
# dJ_list = []
# for i in range(self.x_b.shape[1]):
# dJ_list.append(-2 * (y_train - self.x_b.dot(theat)).dot(self.x_b[:,i]))
# return np.array(dJ_list) / len(self.x_b) #这里是较为笨拙的梯度矩阵求解(同上区别在于推出梯度矩阵后没有化简,就写了代码)
def gradient_descent(elison = 1e-8,n = 1e5):
theat = ini_theat
theat_list = []
theat_list.append(theat)
i = 0
while i < n:
last_theat = theat
theat = theat - dJ(theat) * eta
theat_list.append(theat)
if abs(J(theat) - J(last_theat)) < elison:
break
i += 1
self.theat = theat
self.core = theat_list[0]
self.th = theat[1:]
return theat_list
return np.array(gradient_descent())
def predict(self,x,theat):
x = np.array(x)
assert x.shape[1] == len(theat)
return np.array(x.dot(theat))
def __repr__(self):
return 'gd()'
二.以下是进行数据测试(Boston):
from linear.test1 import gd
from sklearn.preprocessing import StandardScaler
data = datasets.load_boston()
t = data.data
r = data.target
x = t[r < 50]
y = r[r < 50]
m = StandardScaler()
m.fit(x)
x = m.transform(x) #标准化数据
n = gd()
n.gra(x,y,0.01) #梯度下降法
print(n.theat)
print(data.feature_names[np.argsort(n.th)]) #分析权重
三.以下是运行结果:
其中feature_names中越靠后的特征(正值)同房价的正相关程度越高。结合上图结果分析即"RM""RAD""ZN"等特征数据越大,房价越高,越靠后的特征(负值)同房价负相关程度越高。同样结合上图知"DIS""LSTAT""TAX"等特征数据越大,房价越低。
初学机器学习算法,程序设计如有不妥之处,还请大佬们多多指教,谢谢了。