随机森林算法的超参数调优

超参数调优

  • RandomSearch方法
  • GridSearchCV调参

RandomSearch方法

from sklearn.model_selection import RandomizedSearchCV
 
# 参数配置
# 随机森林中树的个数
n_estimators = [int(x) for x in np.linspace(start = 1, stop = 1000, num = 100)]
# 每一节点考虑切分的节点数
max_features = ['auto', 'sqrt']
# 最大深度
max_depth = [int(x) for x in np.linspace(10, 200, num = 20)]
max_depth.append(None)
# 切分一个节点最小数量
min_samples_split = [2, 5, 10]
# 每一叶子节点最小数量
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

#随机种子
random_state=[int(x) for x in np.linspace(10, 200, num = 15)]
random_state.append(None)

# Create the random grid
random_grid = {'random_state':random_state,
               'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
 
# 模型构建
model_RandomForestRegressor = ensemble.RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator=model_RandomForestRegressor, param_distributions=random_grid,
                              n_iter = 100, scoring='neg_mean_absolute_error', 
                              cv = 3, verbose=2, random_state=2, n_jobs=-1)
 
# 训练
rf_random.fit(x_train , y_train)

# 给出最佳参数
rf_random.best_params_

GridSearchCV调参

from sklearn.model_selection import GridSearchCV, KFold

# 参数配置
#随机种子
random_state=[int(x) for x in np.linspace(0, 10, num = 1)]
random_state.append(None)

parameters = {'hidden_layer_sizes': [(4,4),(5,5),(6,6),(7,7),(8,8),(10,),(100,),(1000,)],
              'random_state':random_state,
              'activation':['logistic'],
              'solver':['lbfgs'],
              'alpha':[0.05],
              'max_iter':[20000],
              'learning_rate_init':[0.0001]
              }

kfold = KFold(n_splits=10)

ann = MLPRegressor()
# 训练集上进行网格搜索
grid = GridSearchCV(ann, parameters, scoring='r2', cv=kfold)

grid = grid.fit(x_train, y_train)

# 查看最优参数
reg = grid.best_estimator_
print('best score: %f'%grid.best_score_)
print('best parameters:')
for key in parameters.keys():
    print('%s: %d'%(key, reg.get_params()[key]))

print('test score: %f'%reg.score(x_test, y_test))

# 将最优参数传入estimator,获取最优模型
#ann = MLPRegressor(hidden_layer_sizes=reg.get_params()['hidden_layer_sizes'], random_state=reg.get_params()['random_state'])

你可能感兴趣的:(Python数学算法,算法,随机森林)