using python
所有代码都是通过理论自己写出来的,实属不易。出结果的时候那个激动啊!
线性回归理论:https://blog.csdn.net/jk_chen_acmer/article/details/102973678
import numpy as np
import matplotlib.pyplot as plt
import copy
# 获取随机数据
# X:m*n,第一列默认为1
# Y:m*1
# y=5+3.14x浮动1单位
def getData():
m = 40
n = 2
X = np.mat(np.ones([m, n], float))
Y = np.mat(np.ones([m, 1], float))
X = X.T
for i in range(1, n):
X[i] = np.random.random(m) * 10
X = X.T
for i in range(0, m):
Y[i, 0] = 5 + 3.14 * X[i,1]
Y[i, 0] += 1 - np.random.rand(1)[0] * 2
return [X, Y]
# 画出数据点 , 以及预测函数
def draw(X, Y, linear=None):
m = X.shape[0]
n = X.shape[1]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set(xlim=[0, 10], ylim=[0, 40],
title='Data', ylabel='y', xlabel='x')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0)) # x轴:y=0
for i in range(0, m):
ax.plot(X[i, 1], Y[i, 0], '.k')
plt.show()
if __name__ == '__main__':
data = getData()
draw(data[0], data[1])
基于向量化运算
import numpy as np
import matplotlib.pyplot as plt
import copy
# 获取随机数据
# X:m*n,第一列默认为1
# Y:m*1
# y=5+3.14x浮动1单位
def getData():
m = 40
n = 2
X = np.mat(np.ones([m, n], float))
Y = np.mat(np.ones([m, 1], float))
X = X.T
for i in range(1, n):
X[i] = np.random.random(m) * 10
X = X.T
for i in range(0, m):
Y[i, 0] = 5 + 3.14 * X[i, 1]
Y[i, 0] += 1 - np.random.rand(1)[0] * 2
return [X, Y]
# 获取预测值: Th:1*n X:1*n
def getH(Th, X):
return (Th * X.T)[0, 0]
# 梯度下降法
# 数据集: X:m*n Y:m*1
def gradientDescent(X, Y):
m = X.shape[0]
n = X.shape[1]
rate = 5e-3 # 学习速率
Th = np.mat(np.ones([1, n], float)) # 系数Th:1*n
preJ = 0 # 通过Th和X计算代价函数J
for i in range(0, m):
preJ += (getH(Th, X) - Y[i, 0]) ** 2
preJ /= 2 * m
while 1:
Old = copy.copy(Th)
for i in range(0, m):
Th -= rate / m * (getH(Old, X[i]) - Y[i, 0]) * X[i] # 梯度下降
J = 0 # 通过theta和x计算代价函数J的值
for i in range(0, m):
J += (getH(Th, X) - Y[i, 0]) ** 2
J /= 2 * m
if (abs(J - preJ) < 1e-5): # 代价函数差异判断收敛
break
print(J)
preJ = J
return Th # 返回预测函数参数向量
# 画出数据点 , 以及预测函数
def draw(X, Y, Th=None):
m = X.shape[0]
n = X.shape[1]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set(xlim=[0, 10], ylim=[0, 40],
title='Data', ylabel='y', xlabel='x')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0)) # x轴:y=0
for i in range(0, m):
ax.plot(X[i, 1], Y[i, 0], '.k')
if np.all(Th != None):
x = np.linspace(0, 10, 100)
y = []
for i in range(0, 100):
y.append(getH(Th, np.mat([1, x[i]]))) # 所有特征值
ax.plot(x, y)
plt.show()
if __name__ == '__main__':
data = getData()
Th = gradientDescent(data[0], data[1])
print(Th)
draw(data[0], data[1], Th)
使用到特征缩放
import numpy as np
import matplotlib.pyplot as plt
import copy
# 获取随机数据
# X:m*n,第一列默认为1
# Y:m*1
# y=3(x-5)^2-5浮动1单位
def getData():
m = 40
n = 3
X = np.mat(np.ones([m, n], float))
Y = np.mat(np.ones([m, 1], float))
X = X.T
for i in range(1, n):
X[i] = np.random.random(m) * 10
X = X.T
for i in range(0, m):
Y[i, 0] = 3 * (X[i, 1] - 5) ** 2 - 5
X[i, 2] = X[i, 1] ** 2 / 10
Y[i, 0] += 1 - np.random.rand(1)[0] * 2
return [X, Y]
# 获取预测值: Th:1*n X:1*n
def getH(Th, X):
return (Th * X.T)[0, 0]
# 梯度下降法
# 数据集: X:m*n Y:m*1
def gradientDescent(X, Y):
m = X.shape[0]
n = X.shape[1]
rate = 3e-3 # 学习速率
Th = np.mat(np.ones([1, n], float)) # 系数Th:1*n
preJ = 0 # 通过Th和X计算代价函数J
for i in range(0, m):
preJ += (getH(Th, X[i]) - Y[i, 0]) ** 2
preJ /= 2 * m
while 1:
Old = copy.copy(Th)
for i in range(0, m):
Th -= rate / m * (getH(Old, X[i]) - Y[i, 0]) * X[i] # 梯度下降
J = 0 # 通过theta和x计算代价函数J的值
for i in range(0, m):
J += (getH(Th, X[i]) - Y[i, 0]) ** 2
J /= 2 * m
if (abs(J - preJ) < 1e-5): # 代价函数差异判断收敛
break
print(J)
preJ = J
return Th # 返回预测函数参数向量
# 画出数据点 , 以及预测函数
def draw(X, Y, Th=None):
m = X.shape[0]
n = X.shape[1]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set(xlim=[0, 10], ylim=[-7, 40],
title='Data', ylabel='y', xlabel='x')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0)) # x轴:y=0
for i in range(0, m):
ax.plot(X[i, 1], Y[i, 0], '.k')
if np.all(Th != None):
x = np.linspace(0, 10, 100)
y = []
for i in range(0, 100):
y.append(getH(Th, np.mat([1, x[i], x[i] ** 2 / 10]))) # 所有特征值
ax.plot(x, y)
plt.show()
if __name__ == '__main__':
data = getData()
Th = gradientDescent(data[0], data[1])
print(Th)
draw(data[0], data[1], Th)
y = 5 + 3 x 1 + 6 x 2 − 5 x 3 + 10 x 4 − 12 x 5 y=5+3x1+6x2-5x3+10x4-12x5 y=5+3x1+6x2−5x3+10x4−12x5(上下浮动 2单位)
import numpy as np
import matplotlib.pyplot as plt
import copy
# 获取随机数据
# X:m*n,第一列默认为1
# Y:m*1
# y=5+3x1+6x2-5x3+10x4-12x5浮动1单位
def getData():
m = 40
n = 6
X = np.mat(np.ones([m, n], float))
Y = np.mat(np.ones([m, 1], float))
X = X.T
for i in range(1, n):
X[i] = np.random.random(m) * 10
X = X.T
for i in range(0, m):
Y[i, 0] += 5 + 3 * X[i, 1] + 6 * X[i, 2] - 5 * X[i, 3] + 10 * X[i, 4] - 12 * X[i, 5]
Y[i, 0] += 1 - np.random.random(1)[0] * 2
return [X, Y]
# 获取预测值: Th:1*n X:1*n
def getH(Th, X):
return (Th * X.T)[0, 0]
# 梯度下降法
# 数据集: X:m*n Y:m*1
def gradientDescent(X, Y):
m = X.shape[0]
n = X.shape[1]
rate = 5e-3 # 学习速率
Th = np.mat(np.ones([1, n], float)) # 系数Th:1*n
preJ = 0 # 通过Th和X计算代价函数J
for i in range(0, m):
preJ += (getH(Th, X) - Y[i, 0]) ** 2
preJ /= 2 * m
while 1:
Old = copy.copy(Th)
for i in range(0, m):
Th -= rate / m * (getH(Old, X[i]) - Y[i, 0]) * X[i] # 梯度下降
J = 0 # 通过theta和x计算代价函数J的值
for i in range(0, m):
J += (getH(Th, X) - Y[i, 0]) ** 2
J /= 2 * m
if (abs(J - preJ) < 1e-5): # 代价函数差异判断收敛
break
print(J)
preJ = J
return Th # 返回预测函数参数向量
if __name__ == '__main__':
data = getData()
Th = gradientDescent(data[0], data[1])
print(Th)
运行结果:
证明多个参数的线性回归也没有问题,甚至可以调高判断收敛的要求更加逼近,但是程序会跑很久。
[[ 6.46117406 2.98766795 5.95135828 -5.01541145 9.98035538
-11.97913789]]
很神奇,直接计算出最优解而不需要迭代
import numpy as np
import matplotlib.pyplot as plt
import copy
# 获取随机数据
# X:m*n,第一列默认为1
# Y:m*1
# y=3(x-5)^2-5浮动1单位
def getData():
m = 40
n = 3
X = np.mat(np.ones([m, n], float))
Y = np.mat(np.ones([m, 1], float))
X = X.T
for i in range(1, n):
X[i] = np.random.random(m) * 10
X = X.T
for i in range(0, m):
Y[i, 0] = 3 * (X[i, 1] - 5) ** 2 - 5
X[i, 2] = X[i, 1] ** 2 / 10
Y[i, 0] += 1 - np.random.rand(1)[0] * 2
return [X, Y]
# 获取预测值: Th:1*n X:1*n
def getH(Th, X):
return (Th * X.T)[0, 0]
# 特征方程解法
# 数据集: X:m*n Y:m*1
def ChEquation(X, Y):
return (np.linalg.pinv(X.T * X) * X.T * Y).T
# 画出数据点 , 以及预测函数
def draw(X, Y, Th=None):
m = X.shape[0]
n = X.shape[1]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set(xlim=[0, 10], ylim=[-7, 40],
title='Data', ylabel='y', xlabel='x')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0)) # x轴:y=0
for i in range(0, m):
ax.plot(X[i, 1], Y[i, 0], '.k')
if np.all(Th != None):
x = np.linspace(0, 10, 100)
y = []
for i in range(0, 100):
y.append(getH(Th, np.mat([1, x[i], x[i] ** 2 / 10]))) # 所有特征值
ax.plot(x, y)
plt.show()
if __name__ == '__main__':
data = getData()
Th = ChEquation(data[0], data[1])
print(Th)
draw(data[0], data[1], Th)