线性模型——线性回归模型的正则化

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,cross_validation

加载数据

def load_data():
    diabetes=datasets.load_diabetes()
    return cross_validation.train_test_split(diabetes.data,diabetes.target,
                                            test_size=0.25,random_state=0)

岭回归

模型原型
class sklearn.linear_model.Ridge(alpha=1.0,fit_intercept=True,
normalize=False,copy_X=True,max_iter=None,tol=0.001,
solver=’auto’,random_state=None)
参数

  • alpha:a值,值越大则正则化的占比越大
  • fit_intercept
  • normalize
  • copy_X
  • max_iter:最大迭代数
  • tol:判断迭代是否收敛的阙值
  • solver:求解最优化问题的算法
    • auto:自动选择算法
    • svd:使用奇异值分解来计算回归系数
    • cholesky:使用scipy.linalg.solve函数来求解
    • sparse_cg:使用scipy.sparse.linalg.cg函数来求解
    • lsqr:使用scipy.sparse.linalg.lsqr函数来求解(运算速度快)
    • sag:使用Stochastic Average Gradient descent算法(求解最优化问题)
  • random_state
    • 整数:指定随机数生成器的种子
    • RandomState实例:指定随机数生成器
    • None:使用默认的随机数生成器

属性

  • coef_
  • intercept_
  • niter:实际迭代次数

方法

  • it(X,y[,sample_weight])
  • predict(X)
  • score(X,y(,sample_weight))

使用Ridge

def test_Ridge(*data):
    x_train,x_test,y_train,y_test=data
    regr=linear_model.Ridge()
    regr.fit(X_train,y_train)
    print('Coefficients:%s,\nintercept %.2f'%(regr.coef_,regr.intercept_))
    print('Residual sum of squares:%.2f'%np.mean((regr.predict(X_test)-y_test)**2))
    print('Score:%.2f'%regr.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data()
test_Ridge(X_train,X_test,y_train,y_test)

a系数对预测性能的影响

def test_Ridge_alpha(*data):
    X_train,X_test,y_train,y_test=data
    alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
    scores=[]
    for i,alpha in enumerate(alphas):
        regr=linear_model.Ridge(alpha=alpha)
        regr.fit(X_train,y_train)
        scores.append(regr.score(X_test,y_test))
    #绘图
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,scores)
    ax.set_xlabel(r"$\alpha$")
    ax.set_ylabel(r"score")
    ax.set_xscale('log')
    ax.set_title("Ridge")
    plt.show()

X_train,X_test,y_train,y_test=load_data()
test_Ridge_alpha(X_train,X_test,y_train,y_test)

Lasso回归

模型原型
class sklearn.linear_model.Lasso(alpha=1.0,fit_intercept=True,
normalize=False,precompute=False,copy_X=True,
max_iter=1000,tol=0.0001,warm_start=False,positive=False,
random_state=None,selection=’cyclic’)
参数

  • alpha
  • fit_intercept
  • normalize
  • precompute:是否计算Gram矩阵来加速计算
  • copy_X
  • max_iter
  • tol
  • warm_start:是否使用前一次训练结果继续训练
  • positive:如果为True,那么强制要求权重向量的分量都为正数
  • random_state
  • selection
    • random:更新的时候,随机选择权重向量的一个分量来更新
    • cyclic:更新的时候,从前向后依次选择权重向量的分量来更新

属性

  • coef_
  • intercept_
  • niter

方法

  • fit(X,y[,sample_weight])
  • predict(X)
  • score(X,y[,sample_weight])

使用Lasso

def test_Lasso(*data):
    X_train,X_test,y_train,y_test=data
    regr=linear_model.Lasso()
    regr.fit(X_train,y_train)
    print('Coefficients:%s,\nintercept %.2f'%(regr.coef_,regr.intercept_))
    print('Residual aum of squares:%.2f'%np.mean((regr.predict(X_test)-y_test)**2))
    print('Score:%.2f'%regr.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data()
test_Lasso(X_train,X_test,y_train,y_test)

a系数对预测性能的影响

def test_Lasso_alpha(*data):
    X_train,X_test,y_train,y_test=data
    alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
    scores=[]
    for i,alpha in enumerate(alphas):
        regr=linear_model.Lasso(alpha=alpha)
        regr.fit(X_train,y_train)
        scores.append(regr.score(X_test,y_test))
    #绘图
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,scores)
    ax.set_xlabel(r'$\alpha$')
    ax.set_ylabel(r'score')
    ax.set_xscale('log')
    ax.set_title("Lasso")
    plt.show()

X_train,X_test,y_train,y_test=load_data()
test_Lasso_alpha(X_train,X_test,y_train,y_test)

ElasticNet回归

模型原型
class sklearn.linear_model.ElasticNet(alpha=1.0,l1_ratio=0.5,
fit_intercept=True,normalize=False,precompute=False, copy_X=True,max_iter=1000,tol=0.0001,warm_start=False,
positive=False,random_state=None,selection=’cyclic’)
参数

  • alpha
  • l1_ratio:p值
  • fit_intercept
  • normalize
  • precompute
  • copy_X
  • max_iter
  • tol
  • warm_start
  • positive
  • random_state
  • selection

属性

  • coef_
  • intercept_
  • niter

方法

  • fit(X,y[,sample_weight])
  • predict(X)
  • score(X,y[,sample_weight])

使用ElasticNet

def test_ElasticNet(*data):
    X_train,X_test,y_train,y_test=data
    regr=linear_model.ElasticNet()
    regr.fit(X_train,y_train)
    print('Coefficients:%s,\nintercept %.2f'%(regr.coef_,regr.intercept_))
    print('Residual sum of squares:%.2f'%np.mean((regr.predict(X_test)-y_test)**2))
    print('Score:%.2f'%regr.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data()
test_ElasticNet(X_train,X_test,y_train,y_test)

a系数对预测能力的影响

def test_ElasticNet_alpha_rho(*data):
    alphas=np.logspace(-2,2)
    rhos=np.linspace(0.01,1)
    scores=[]
    for alpha in alphas:
        for rho in rhos:
            regr=linear_model.ElasticNet(alpha=alpha,l1_ratio=rho)
            regr.fit(X_train,y_train)
            scores.append(regr.score(X_test,y_test))
    #绘图
    alphas,rhos=np.meshgrid(alphas,rhos)
    scores=np.array(scores).reshape(alphas.shape)
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    fig=plt.figure()
    ax=Axes3D(fig)
    surf=ax.plot_surface(alphas,rhos,scores,rstride=1,cstride=1,cmap=cm.jet,linewidth=0,antialiased=False)
    fig.colorbar(surf,shrink=0.5,aspect=5)
    ax.set_xlabel(r"$\alpha$")
    ax.set_ylabel(r"$\rho$")
    ax.set_zlabel("score")
    ax.set_title("ElasticNet")
    plt.show()

X_train,X_test,y_train,y_test=load_data()
test_ElasticNet_alpha_rho(X_train,X_test,y_train,y_test)

你可能感兴趣的:(Python,机器学习)