pip 的可以使用这个
$ pip install bayesian-optimization
conda的可以使用这个
$ conda install -c conda-forge bayesian-optimization
# 导入包
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
import catboost as cb
from bayes_opt import BayesianOptimization
class optimizationClass():
# 初始化的时候把数据集传进来
def __init__(self,df):
self.df = df
# 定义一个评估指标的函数评估模型好坏
def reg_calculate(self,true, prediction):
mse = metrics.mean_squared_error(true, prediction)
rmse = np.sqrt(mse)
mae = metrics.mean_absolute_error(true, prediction)
mape = np.mean(np.abs((true - prediction) / true)) * 100
r2 = metrics.r2_score(true, prediction)
rmsle = np.sqrt(metrics.mean_squared_log_error(true, prediction))
# print("mse: {}, rmse: {}, mae: {}, mape: {}, r2: {}, rmsle: {}".format(mse, rmse, mae, mape, r2, rmsle))
# return mse, rmse, mae, mape, r2, rmsle
return {"mse": mse, "rmse": rmse, "mae": mae, "mape": mape, "r2": r2, "rmsle": rmsle}
# 优化函数 参数为你需要调参的 hyper Parameter 这里用catBoost作为样例
def optimization_function(self,iterations,learning_rate,depth,l2_leaf_reg):
parameterDict = {"iterations":int(iterations),"learning_rate": float(learning_rate), "depth": int(depth),
"l2_leaf_reg":float(l2_leaf_reg),
"task_type":"CPU","logging_level":"Silent"}
CB_Regressor=cb.CatBoostRegressor(**parameterDict)
CB_Regressor.fit(self.df["X_train"], self.df["Y_train"])
Y_pre=CB_Regressor.predict(self.df["X_test"])
resDict =self.reg_calculate(self.df["Y_test"],Y_pre)
return resDict["r2"]
# 定义一下模型
def run(self, init_points=2,n_iter=3):
cb_bo = BayesianOptimization(
self.optimization_function,
{'iterations': (200, 5000),
'learning_rate': (1e-6, 1e-2),
'depth': (2, 15),
'l2_leaf_reg': (0, 5)}
)
cb_bo.maximize(
init_points=init_points,
n_iter=n_iter)
print("Final result:", cb_bo.max)
#
# 如何使用
Opt = optimizationClass(df = {"X_train":X_Train, "X_test":X_Test, "Y_train":Y_Train, "Y_test":Y_Test})
Opt.run()
boston Estate Dataset Example:
from sklearn import datasets # 导入库
boston = datasets.load_boston() # 导入波士顿房价数据
print(boston.keys()) # 查看键(属性) ['data','target','feature_names','DESCR', 'filename']
print(boston.data.shape,boston.target.shape) # 查看数据的形状 (506, 13) (506,)
print(boston.feature_names) # 查看有哪些特征 这里共13种
print(boston.DESCR) # described 描述这个数据集的信息
print(boston.filename) # 文件路径
from sklearn.model_selection import train_test_split
# check data shape
print("boston.data.shape %s , boston.target.shape %s"%(boston.data.shape,boston.target.shape))
train = boston.data # sample
target = boston.target # target
# 切割数据样本集合测试集
X_train, X_test, Y_train, Y_test = train_test_split(train, target, test_size=0.2) # 20%测试集;80%训练集
op = optimizationClass({"X_train":X_train,"X_test":X_test,"Y_train":Y_train,"Y_test":Y_test})
op.run(init_points=20,n_iter=30)
輸出結果:
| iter | target | depth | iterat... | l2_lea... | learni... |
-------------------------------------------------------------------------
| 1 | 0.9282 | 11.81 | 4.533e+0 | 3.429 | 0.004794 |
| 2 | 0.9276 | 5.078 | 4.344e+0 | 3.845 | 0.002252 |
| 3 | 0.9346 | 7.365 | 2.657e+0 | 3.246 | 0.006347 |
| 4 | 0.9209 | 11.48 | 2.92e+03 | 3.517 | 0.002591 |
| 5 | 0.499 | 10.03 | 419.6 | 4.096 | 0.001719 |
| 6 | 0.9343 | 8.34 | 2.694e+0 | 4.258 | 0.007294 |
| 7 | 0.9249 | 8.519 | 854.2 | 2.287 | 0.006147 |
| 8 | 0.9332 | 9.853 | 3.582e+0 | 2.197 | 0.007308 |
| 9 | 0.9279 | 4.312 | 4.274e+0 | 1.338 | 0.007123 |
| 10 | 0.5959 | 11.42 | 1.607e+0 | 0.1317 | 0.000427 |
| 11 | 0.8569 | 4.669 | 331.7 | 0.8752 | 0.006876 |
| 12 | 0.9156 | 14.84 | 4.485e+0 | 3.21 | 0.005824 |
| 13 | 0.4344 | 2.786 | 1.692e+0 | 1.453 | 0.000322 |
| 14 | 0.9264 | 6.568 | 4.434e+0 | 4.968 | 0.001957 |
| 15 | 0.5893 | 14.47 | 344.0 | 0.09192 | 0.002032 |
| 16 | 0.9308 | 5.575 | 3.734e+0 | 4.577 | 0.004715 |
| 17 | 0.9091 | 14.26 | 2.902e+0 | 0.823 | 0.003099 |
| 18 | 0.9212 | 12.85 | 2.764e+0 | 0.6054 | 0.0057 |
| 19 | 0.9308 | 8.671 | 4.675e+0 | 1.319 | 0.008707 |
| 20 | 0.8202 | 11.81 | 1.814e+0 | 2.959 | 0.001198 |
| 21 | 0.9159 | 10.66 | 4.572e+0 | 4.217 | 0.001212 |
| 22 | 0.9269 | 10.75 | 840.0 | 0.2402 | 0.008043 |
| 23 | 0.9254 | 11.77 | 2.675e+0 | 1.939 | 0.003613 |
| 24 | 0.743 | 3.133 | 287.7 | 3.102 | 0.00504 |
| 25 | 0.8311 | 9.778 | 2.659e+0 | 3.956 | 0.000840 |
| 26 | 0.8424 | 6.952 | 2.146e+0 | 3.603 | 0.001016 |
| 27 | 0.8028 | 7.485 | 201.4 | 3.991 | 0.009137 |
| 28 | 0.7884 | 12.78 | 4.139e+0 | 0.01309 | 0.000319 |
| 29 | 0.8163 | 6.678 | 2.703e+0 | 0.9174 | 0.000598 |
| 30 | 0.9211 | 3.304 | 2.852e+0 | 3.181 | 0.00489 |
| 31 | 0.9315 | 10.15 | 4.572e+0 | 4.742 | 0.009919 |
| 32 | 0.9153 | 2.532 | 2.65e+03 | 2.52 | 0.006542 |
| 33 | 0.9266 | 11.07 | 2.685e+0 | 1.156 | 0.004197 |
| 34 | 0.9246 | 3.542 | 4.578e+0 | 1.338 | 0.004215 |
| 35 | 0.9176 | 3.253 | 4.567e+0 | 1.612 | 0.002075 |
| 36 | 0.9258 | 3.051 | 2.911e+0 | 0.6876 | 0.007575 |
| 37 | 0.9319 | 7.894 | 4.546e+0 | 5.0 | 0.01 |
| 38 | 0.9141 | 2.334 | 2.683e+0 | 4.422 | 0.006926 |
| 39 | 0.9252 | 11.3 | 3.595e+0 | 0.0 | 0.009926 |
| 40 | 0.7408 | 2.598 | 4.687e+0 | 3.924 | 0.000349 |
| 41 | 0.9133 | 13.61 | 4.663e+0 | 0.165 | 0.002419 |
| 42 | 0.8717 | 2.679 | 4.664e+0 | 2.919 | 0.000942 |
| 43 | 0.9046 | 2.163 | 3.59e+03 | 4.122 | 0.003341 |
| 44 | 0.9187 | 2.043 | 2.663e+0 | 0.9121 | 0.006893 |
| 45 | 0.9034 | 3.795 | 3.72e+03 | 1.496 | 0.001378 |
| 46 | 0.9202 | 2.537 | 4.259e+0 | 2.654 | 0.008496 |
| 47 | 0.8981 | 14.64 | 4.267e+0 | 1.603 | 0.001094 |
| 48 | 0.9271 | 7.045 | 4.358e+0 | 2.492 | 0.00139 |
| 49 | 0.9319 | 6.785 | 4.449e+0 | 3.364 | 0.005274 |
| 50 | 0.856 | 9.51 | 4.467e+0 | 3.623 | 0.000571 |
=========================================================================
Final result: {'target': 0.9346381784461701, 'params': {'depth': 7.3647831970412785, 'iterations': 2657.0839817870524, 'l2_leaf_reg': 3.245530918527948, 'learning_rate': 0.006347041199650392}}
Refercence:
BayesianOptimization 库的官方文档
https://blog.csdn.net/ningyanggege/article/details/89003698