假设样本空间为 d d 个维度,用 x x ={ x1,x2,x3,...,xd x 1 , x 2 , x 3 , . . . , x d }来表示一个样本点,线性回归的目标是用 d+1 d + 1 个参数来拟合样本点的输入和输出。通常我们会将 x x 扩充为 d+1 d + 1 维的向量 x x ={ x0,x1,x2,x3,...,xd x 0 , x 1 , x 2 , x 3 , . . . , x d },第 x0 x 0 设为1作为偏置项。线性回归表达式如下:
理解了线性回归和梯度下降的基本原理,用python撸出来也就10分钟的时间:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import scale
from random import random
from sklearn.model_selection import train_test_split
class LinearRegression(object):
weight = np.array([])
way = 'gd'
def __init__(self, training_way = 'gd'):
self.way = training_way
def gradientDescent(self, X, Y, alpha, epoch):
W = np.random.normal(0,1,size=(X.shape[1],))
for i in range(epoch):
W -= alpha*(X.T).dot(X.dot(W)-Y)/X.shape[0]
return W
def fit(self, train_data, train_target, alpha = 0.1, epoch = 300):
X = np.ones((train_data.shape[0], train_data.shape[1]+1))
X[:,0:-1] = train_data
Y = train_target
if self.way == 'gd':
self.weight = self.gradientDescent(X, Y, alpha, epoch)
else:
self.weight = np.linalg.inv((X.T).dot(X)).dot(X.T).dot(Y)
def predict(self, test_data):
X = np.ones((test_data.shape[0], test_data.shape[1]+1))
X[:,0:-1] = test_data
return X.dot(self.weight)
def evaluate(self, predict_target, test_target):
predict_target[predict_target>=0.5] = 1
predict_target[predict_target<0.5] = 0
return sum(predict_target==test_target)/len(predict_target)
if __name__ == "__main__":
cancer = load_breast_cancer()
xtr, xval, ytr, yval = train_test_split(cancer.data, cancer.target, \
test_size=0.2, random_state=7)
linear = LinearRegression(training_way = 'gd')
linear.fit(xtr, ytr, alpha = 0.05, epoch = 1000)
predict = linear.predict(xval)
print('linear regression accruacy:',linear.evaluate(predict, yval))