model

这章很多东西背后理论都很熟悉了,主要熟悉包和函数

linear regression:

from sklearn.linear_model import LinearRegression

model = LinearRegression(normalize=True)

model = model.fit(train_X, train_y)

cross validation:

from sklearn.model_selection import cross_val_score

from sklearn.metrics import mean_absolute_error,  make_scorer

scores = cross_val_score(model, X=train_X, y=train_y, verbose=1, cv = 5, scoring=make_scorer(mean_absolute_error))

学习率曲线和验证曲线

from sklearn.model_selection import learning_curve, validation_curve

L1,L2 regularization

from sklearn.linear_model import Ridge

from sklearn.linear_model import Lasso

非线性模型

from sklearn.svm import SVC

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import RandomForestRegressor

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.neural_network import MLPRegressor

from xgboost.sklearn import XGBRegressor

from lightgbm.sklearn import LGBMRegressor

调参:

贪心算法:按顺序找局部最优,代替为全局最优

网格寻优:按固定步长在范围内遍历一遍,省力耗时

objective = ['regression', 'regression_l1', 'mape', 'huber', 'fair']

num_leaves = [3,5,10,15,20,40, 55]

max_depth = [3,5,10,15,20,40, 55]

from sklearn.model_selection import GridSearchCV

parameters = {'objective': objective , 'num_leaves': num_leaves, 'max_depth': max_depth}

model = LGBMRegressor()

clf = GridSearchCV(model, parameters, cv=5)

clf = clf.fit(train_X, train_y)

贝叶斯寻优:按之前步骤的结果给出概率函数,以此为依据更新参数

Python中有几个贝叶斯优化库,它们目标函数的替代函数不一样。在本文中,我们将使用Hyperopt,它使用Tree Parzen Estimator(TPE)。其他Python库包括Spearmint(高斯过程代理)和SMAC(随机森林回归)

from bayes_opt import BayesianOptimization

def rf_cv(num_leaves, max_depth, subsample, min_child_samples):

    val = cross_val_score(

        LGBMRegressor(objective = 'regression_l1',

            num_leaves=int(num_leaves),

            max_depth=int(max_depth),

            subsample = subsample,

            min_child_samples = int(min_child_samples)

        ),

        X=train_X, y=train_y_ln, verbose=0, cv = 5, scoring=make_scorer(mean_absolute_error)

    ).mean()

    return 1 - val

rf_bo = BayesianOptimization(

    rf_cv,

    {

    'num_leaves': (2, 100),

    'max_depth': (2, 100),

    'subsample': (0.1, 1),

    'min_child_samples' : (2, 100)

    }

)

rf_bo.maximize()

你可能感兴趣的:(model)