其为一个凸函数,其最低点为代价函数最小值,即参数的最优解,使性能最好。
最小二乘法来源极大似然估计,是一种理论性的参数估计方法。一般步骤为:
梯度下降:是一个用来求函数最小值的算法,将使用梯度下降法来求出代价函数 J ( θ 0 . θ 1 ) J(\theta_{0}.\theta_{1}) J(θ0.θ1)的最小值。
θ j = θ j − α ∂ ∂ θ 1 J ( θ 0 , θ 1 ) \theta_{j}=\theta_{j}-\alpha \frac{\partial }{\partial \theta_1}J(\theta_0,\theta_1) θj=θj−α∂θ1∂J(θ0,θ1)
'''
步骤:
1、确定步长,theta的初始值
2、计算代价函数值
3、计算代价函数导数
4、更新theta参数值
'''
def GD():
# 随机一个初始值
init_theta = random.randint(-10, 10)
x = init_theta
#随机一个学习率
alpha = (random.randint(1,999)) / 10000
#设置迭代最大次数
m_iter = 10
#求解过程
X = [] #记录生成的theta值
Y = [] #记录代价函数值
y = f(x)
X.append(x)
Y.append(y)
y_change = 1
i = 0
while y_change > 1e-10 and i < m_iter:
x -= alpha * g(x)
pre_y,y = y,f(x)
y_change = np.abs(pre_y - y)
i += 1
X.append(x)
Y.append(y)
min_x,max_x = np.min(X),np.max(X)
dist = np.max([np.abs(min_x - x),np.abs(max_x - x)]) + 0.1
min_x = x - dist
max_x = x + dist
X2 = np.arange(min_x,max_x,0.05)
Y2 = list(map(lambda t:f(t),X2))
plt.plot(X2,Y2)
plt.plot(X,Y,'bo--')
plt.show()
'''
3D效果显示
'''
import numpy as np
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def GD_3D():
init_theta1 = random.randint(-10,10)
init_theta2 = random.randint(-10,10)
x1 = init_theta1
x2 = init_theta2
alpha = (random.randint(1,999)) / 10000
max_iter = 100
X1 = []
X2 = []
Y = []
i = 0
y = f1(x1,x2)
X1.append(x1)
X2.append(x2)
Y.append(y)
error = 1
while error > 1e-10 and i < max_iter:
x1 -= alpha * g_x1(x1)
x2 -= alpha * g_x2(x2)
pre_y,y = y,f1(x1,x2)
error = np.abs(pre_y - y)
i += 1
X1.append(x1)
X2.append(x2)
Y.append(y)
max_x1 = np.max(np.abs(X1))
max_x2 = np.max(np.abs(X2))
X12 = np.arange(-max_x1,max_x1,0.05)
X22 = np.arange(-max_x2,max_x2,0.05)
X12,X22 = np.meshgrid(X12,X22)
Y2 = np.array(list(map(lambda t:f1(t[0],t[1]),zip(X12.flatten(),X22.flatten())))).reshape(X12.shape)
fig = plt.figure()
ax = Axes3D(fig)
ax.plot_surface(X12,X22,Y2,rstride=1,cstride=1,cmap='rainbow')
ax.plot(X1,X2,Y,'ro--')
plt.show()
为什么 L1 正则可以产生稀疏模型(很多参数=0),而 L2 正则不会出现很多参数为0的情况?
import numpy as np
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
def poly_reg():
x = np.random.uniform(-3,3,size=100)
X = x.reshape(-1,1)
y = 0.5 * x**2 + x + 2 + np.random.normal(0,1,size=100)
plt.scatter(x,y)
plt.show()
#线性回归结果及展示
lin_reg = LinearRegression()
lin_reg.fit(X,y)
y_predict = lin_reg.predict(X)
plt.scatter(x,y)
plt.plot(x,y_predict,color='r')
plt.show()
#多项式回归结果及展示
x2 = np.hstack([X,X**2])
lin_reg2 = LinearRegression()
lin_reg2.fit(x2,y)
y_predict2 = lin_reg2.predict(x2)
plt.scatter(x, y)
x1,y_predict2 = np.sort(x),y_predict2[np.argsort(x)]
plt.plot(x1,y_predict2,color='r')
plt.show()
#使用sklearn的preprocessing进行特征扩展
poly = PolynomialFeatures(degree=2,include_bias=False)
poly.fit(X)
x3 = poly.transform(X)
lin_reg3 = LinearRegression()
lin_reg3.fit(x3, y)
y_predict3 = lin_reg3.predict(x3)
plt.scatter(x, y)
x2, y_predict3 = np.sort(x), y_predict3[np.argsort(x)]
plt.plot(x2, y_predict3, color='r')
plt.show()
#输出权重,截距
print(lin_reg3.coef_,lin_reg3.intercept_)
import numpy as np
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def pipeline_study():
x = np.random.uniform(-3,3,size=100)
X = x.reshape(-1,1)
y = 0.5 * x**2 + x + 2 + np.random.normal(0,1,size=100)
degree = 2
poly_reg = Pipeline(
['poly',PolynomialFeatures(degree=degree)],
['std_scaler',StandardScaler()],
['lin_reg',LinearRegression()]
)
poly_reg.fit(X,y)
y_predict = poly_reg.predict(X)
plt.scatter(x,y)
x1,y_predict2 = np.sort(x),y_predict[np.argsort(x)]
plt.plot(x1,y_predict2,color='r')
plt.show()