1. 创建模型类(关于预测问题)
class XGB(object):
def __init__(self):
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
self.StandardScaler = StandardScaler
self.xgb = xgb
def fit(self, X, y):
scaler_x = self.StandardScaler().fit(X.values.reshape(X.shape[0], -1))
X_min_max = scaler_x.transform(X.values.reshape(X.shape[0], -1))
scaler_y = self.StandardScaler().fit(y.values.reshape(y.shape[0], -1))
y_min_max = scaler_y.transform(y.values.reshape(y.shape[0], -1))
xgb_train = xgb.DMatrix(X_min_max, y_min_max)
params = {
'booster': 'gbtree',
'seed': 0,
'colsample_bytree': 0.8,
'subsample': 0.8,
'learning_rate': 0.08,
'objective': 'reg:squarederror',
'max_depth': 3,
'eval_metric': 'rmse',
'lambda': 1500,
'alpha': 1000,
}
model = xgb.train(params=params,
dtrain=xgb_train,
# evals=evallist,
# feval=feval_RMSE,
verbose_eval=100,
num_boost_round=800,
# early_stopping_rounds=30
)
model.scaler_x = scaler_x
model.scaler_y = scaler_y
return model
def predict(self, X_predict, model):
X_min_max = model.scaler_x.transform(X_predict.values.reshape(X_predict.shape[0], -1))
X_min_max = xgb.DMatrix(X_min_max)
y = model.predict(X_min_max)
y_inverse = model.scaler_y.inverse_transform(y.reshape(y.shape[0], -1))
return y_inverse
2. 模型调参
这里使用sklearn.model_selection.GridSearchCV进行调参;
调参时,先初始化一些值:
learning_rate: 0.1
n_estimators: 500
max_depth: 5
min_child_weight: 1
subsample: 0.8
colsample_bytree:0.8
gamma: 0
reg_alpha: 0
reg_lambda: 1
## 需要调的参数
cv_params = {'n_estimators': [400, 500, 600, 700, 800]}
## 其他基本参数
other_params = {'learning_rate': 0.1,
'n_estimators': 500,
'max_depth': 5,
'min_child_weight': 1,
'seed': 0,
'subsample': 0.8,
'colsample_bytree': 0.8,
'gamma': 0, 'reg_alpha': 0,
'reg_lambda': 1}
model = xgb.XGBRegressor(**other_params) ## *号一定要加!!!
optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
optimized_GBM.fit(X_train, y_train)
evalute_result = optimized_GBM.grid_scores_
print('每轮迭代运行结果:{0}'.format(evalute_result))
print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))