基于python的XGBoost模型建立和调参

1. 创建模型类(关于预测问题)

class XGB(object):
    def __init__(self):
        import xgboost as xgb
        from sklearn.preprocessing import StandardScaler
        self.StandardScaler = StandardScaler
        self.xgb = xgb

    def fit(self, X, y):
        scaler_x = self.StandardScaler().fit(X.values.reshape(X.shape[0], -1))
        X_min_max = scaler_x.transform(X.values.reshape(X.shape[0], -1))
        scaler_y = self.StandardScaler().fit(y.values.reshape(y.shape[0], -1))
        y_min_max = scaler_y.transform(y.values.reshape(y.shape[0], -1))
        xgb_train = xgb.DMatrix(X_min_max, y_min_max)
        params = {
                'booster': 'gbtree',
                'seed': 0,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'learning_rate': 0.08,
                'objective': 'reg:squarederror',
                'max_depth': 3,
                'eval_metric': 'rmse',
                'lambda': 1500,
                'alpha': 1000,
                }
        model = xgb.train(params=params,
                          dtrain=xgb_train,
                          # evals=evallist,
                          # feval=feval_RMSE,
                          verbose_eval=100,
                          num_boost_round=800,
                          # early_stopping_rounds=30
                          )
        model.scaler_x = scaler_x
        model.scaler_y = scaler_y
        return model

    def predict(self, X_predict, model):
        X_min_max = model.scaler_x.transform(X_predict.values.reshape(X_predict.shape[0], -1))
        X_min_max = xgb.DMatrix(X_min_max)
        y = model.predict(X_min_max)
        y_inverse = model.scaler_y.inverse_transform(y.reshape(y.shape[0], -1))
        return y_inverse

2. 模型调参

这里使用sklearn.model_selection.GridSearchCV进行调参;

调参时,先初始化一些值:

learning_rate: 0.1
n_estimators: 500
max_depth: 5
min_child_weight: 1
subsample: 0.8
colsample_bytree:0.8
gamma: 0
reg_alpha: 0
reg_lambda: 1

## 需要调的参数
cv_params = {'n_estimators': [400, 500, 600, 700, 800]}
## 其他基本参数
other_params = {'learning_rate': 0.1,
                'n_estimators': 500, 
                'max_depth': 5, 
                'min_child_weight': 1, 
                'seed': 0,
                'subsample': 0.8, 
                'colsample_bytree': 0.8, 
                'gamma': 0, 'reg_alpha': 0, 
                'reg_lambda': 1}
 
model = xgb.XGBRegressor(**other_params)  ## *号一定要加!!!
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
optimized_GBM.fit(X_train, y_train)
evalute_result = optimized_GBM.grid_scores_
print('每轮迭代运行结果:{0}'.format(evalute_result))
print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

你可能感兴趣的:(模型,python,机器学习)