对于给定样本 x⃗ ,假定其有n维特征,则, x⃗ =(x1,x2,x3,…,xn)T 。线性模型可以表示为:
对于可导函数 h ,令 h(y)=w⃗ Tx⃗ +b ,就得到了广义线性模型。
例如,对数线性回归:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
data = np.loadtxt('LinearRegressionData.txt', delimiter=',')
X = data[:,0]
X = X.reshape(-1,1)
y = data[:,1]
#scikit-learn中线性回归有多种正则化方法,下面的LinearRegression可换成Lasso, Ridge, ElasticNet
regr = LinearRegression()
regr.fit(X,y)
#显示结果
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim(0,25)
ax.set_ylim(-5,30)
ax.set_title('Linear Regression',fontsize=14,fontweight='bold')
ax.set_xlabel('X',fontsize=14,fontweight='bold')
ax.set_ylabel('y',fontsize=14,fontweight='bold')
ax.annotate('fitted regression line', xy=(16, 16*regr.coef_+regr.intercept_),
xytext=(13,1.5), arrowprops=dict(facecolor='black',shrink=0.03,width=1,headwidth=8,headlength=10),
horizontalalignment='left',verticalalignment='top',fontsize=12)
ax.grid()
plt.scatter(X,y,color='lime',marker='*',linewidth=2)
fit_x = np.linspace(0,25)
fit_y = regr.coef_ * fit_x + regr.intercept_
plt.plot(fit_x, fit_y,color='r',linewidth=2)
plt.show()
scikit-learn实现逻辑回归:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
data = np.loadtxt('LogisticRegressionData.txt',delimiter=',')
X = data[:,0:2]
X = X.reshape(-1,2)
y = data[:,2]
#分别获取两类样本的索引
negative = data[:,2] == 0
positive = data[:,2] == 1
#scikit-learn中逻辑回归除了线性模型中的LogisticRegression外还可以用判别分析中的LinearDiscriminantAnalysis来实现,
#即下面的LogisticRegression可换位LinearDiscriminantAnalysis
regr = LogisticRegression()
regr.fit(X,y)
#显示结果
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim(25,105)
ax.set_ylim(25,105)
ax.set_xlabel("X",fontsize=14,fontweight='bold')
ax.set_ylabel("y",fontsize=14,fontweight='bold')
ax.set_title('Logistic Regression',fontsize=14,fontweight='bold')
ax.grid()
plt.scatter(data[negative][:,0], data[negative][:,1],marker='x',c='r')
plt.scatter(data[positive][:,0], data[positive][:,1],marker='+',c='b')
x_min,x_max = X[:,0].min(),X[:,0].max()
y_min,y_max = X[:,1].min(),X[:,1].max()
h = 0.01
xx,yy = np.meshgrid(np.arange(x_min,x_max,h), np.arange(y_min,y_max,h))
Z = regr.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contour(xx,yy,Z,[0.5],colors='g')
plt.savefig('Logistic Regression',dpi=100)
plt.show()
附件:
如需使用本文所用的数据LinearRegressionData.txt
和LogisticRegressionData.txt
,请移步https://github.com/GarryLau/MachineLearning/tree/master/LinearModel进行下载。