import optuna
def objective():
x=trial.suggest_flot("x", -10, 10)
return (x-2)**2
study = optuna.create_study()
study.optimize(objective, n_trials=100)
objective: 目标函数的单次调用
trial: 优化会话,这是一组试验
parameter: 一个要优化其值的变量
方法create_study()返回一个研究对象
import optuna
def objective():
optimizer = trial.suggest_categorical("optimizer", ["MomentumSGD","Adam"])
num_layers = trial.suggest_int("num_layers", 1, 3)
num_channels = trial.suggest_int("num_layers", 1, 3, log=True)
num_units = trial.suggest_int("num_layers", 10, 100, step=5)
dropout_rate = trial.suggest_float("dropout_rate", 0.0, 1.0)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
drop_path_rate = trial.suggest_float("drop_path_rate", 0.0, 1.0, step=0.1)
optuna提供的采样算法:
GridsSampler 网格搜索
RandomSampler 随机搜索
TPESampler 树结构的Parzen Estimator # optuna的默认采样算法
CmaEsSampler 基于CMA-ES
PartialFixelSampler 部分固定参数
NSGAIISample 非支配排序遗传算法
QMCSampler 准蒙特卡洛采样算法
要开启剪枝功能(pruning feature),就需要在每次训练后调用report()和should_prune()功能。
def objective(trial):
x_train, y_train, x_test, y_test = train_test_split(x, y, test_size=0.5, random_state=4399)
alpha = trial.suggest_float("alpha", 1e-5, 1e-1, log=True)
clf = sklearn.Linear_model.SGDClassifier(alpha=alpha)
for step in range(100):
clf.partial_fit(x_train, y_train)
intermediate_value = 1.0 - clf.score(x_test, y_test)
trial.report(intermediate_value, step)
if trial.should_prune():
raise optuna.TrialPruned()
return 1.0 - clf.score(x_test, y_test)
#show message
optuna.logging.get_logger("optuna").addHandler(logging.SteamHandler(sys.stdout))
study = optuna.create_study(pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)
一般而言,如果使用RandomSamoler,则MedianPruner是最好的;如果使用TPESampler,则Hyperband是最好的。
optuna提供的可视化有7种
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plt_param_importances
from optuna.visualization import plot_slice
举例:
plot_optimization_history(study) # 绘制优化历史 (重要)
plot_intermediate_values(study) # 绘制学习曲线(重要)
plot_parallel_coordinate(study) # 绘制高维参数曲线
plot_parallel_coordinate(study, params=['bagging_freq','bagging_fraction']) # 选择绘制的参数
plot_contour(study) # 绘制参数之间的关系图(重要)
plot_contour(study, params=['bagging_freq','bagging_fraction']) # 选择绘制的参数
plot_slice(study) # 绘制参数的分面图,显示单个参数的调参过程
plot_slice(study, params=['bagging_freq','bagging_fraction']) # 选择绘制的参数
plot_param_importances(study) # 绘制参数重要性图(重要)
plot_edf(study) # 绘制经验分布曲线
通过调用create_study()来创建持久性研究,研究使用SQLite文件进行自动记录
import optuna
# add stream handler of stdout to show the messages
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "example-study"
storage_name = "sqlite:///{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name)
创建研究
def objective():
x = trial.suggest_float("x", -10, 10)
return (x-2)**2
study.optimize(objective, n_trials=3)
恢复研究
study = optuna.create_study(study_name=study_name, storage=storage, load_if_existes=True)
study.optuna(objective, n_trials=3)
保存试验
study = optuna.create_study(study_name=study_name, storage=storage_name, load_if_exists=True)
df = study.trial_dataframe(attrs=("number", "value", "params", "state"))
def define_mode(trial):
XXXXX
XXXXX
return nn.Sequential(*layers)
def train_model():
XXXXXX
def eval_model():
XXXXXX
XXXXXX
return flops, accuracy
## begin
def objective(trial):
train_dataset= XXX
val_dataset = XXXX
model = define_model(trial).to(DEVICE)
optimizer = torch.optim.Adam()
flops, accuracy = eval_model(model, val_loader)
return flops, accuracy
study = optuna.create_study(directions=['minimize', 'maximize'])
study.optimize(objective, n_trials=30, timeout=300)
print("Number of finished trials:", len(study.trials))
检查:
optuna.visualization.plot_pareto_front(study, target_names=['FLOPS', 'accuracy'])
获取试验次数
print(f"Number of trials on the Pareto front: {len(study.best_trials)}")
trial_with_highest_accuracy = max(study.best_trials, key=lambda t: t.values[i])
print(f"Trial with highest accuracy:")
print(f"\tnumber:{trial_with_highest_accuracy.number}")
print(f"\tparams:{trial_with_highest_accuracy.params}")
print(f"\tvalues:{trial_with_highest_accuracy.values}")
了解哪些超参数的重要性最大
optuna.visualization.plot_param_importances(study, target=lambda t: t.values[0], target_name="flaps")