神经网络调参--使用hyperopt调整超参数

记录使用hyperopt对神经网络调整超参数。

# 划分验证集
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(seq_train, train_label, test_size=0.2, stratify=train_label, random_state=1234)
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)

## 使用hyperopt调整超参数
from hyperopt import fmin,hp,Trials,space_eval,rand,tpe,anneal, STATUS_OK
from hyperopt.early_stop import no_progress_loss
from tensorflow.keras import layers, models, metrics
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

top_words=min(len(word_indexs)+1, TOP_K)
max_words=100
embed_dim=128

def f_model(params):
    model = models.Sequential()
    model.add(layers.Embedding(input_dim=top_words, output_dim=embed_dim, input_length=max_words))
    model.add(layers.Dropout(rate=0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(params['units'], activation="relu"))
    model.add(layers.Dropout(rate=0.2))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.summary()
    #定义优化函数,使用自定义的learning_rate
    optimizer = tf.keras.optimizers.Adam(params['learning_rate'])
    #编译compile
    model.compile(optimizer = optimizer, #优化函数
                loss = "binary_crossentropy",  #损失函数
                metrics=["accuracy", metrics.Precision(), metrics.Recall()])
       # 提前停止                
    EarlyStop=EarlyStopping(monitor='val_loss',
                            patience=2,
                            verbose=1, 
                            mode='auto')
    model.fit(x_train, 
        y_train,
        epochs=5, 
        batch_size=params['batch_size'],
        validation_data=(x_val, y_val), 
        callbacks = [EarlyStop],
        )         
    # 用测试集上的损失进行寻优
    scores = model.evaluate(x_val, y_val) 
    f1 = 2*(scores[2]*scores[3]) / (scores[2]+scores[3]+1e-10) ## 加小尾巴防止除0
    print('验证集Accuracy:', scores[1], '验证集Precision:', scores[2], '验证集Recall:', scores[3])
    print('验证集f1:', f1)
    return {'loss': -f1,'status': STATUS_OK, 'model': model}

# 定义搜索空间
units_range = range(32,128)
batch_size_range = [32, 64, 128]
spaces = {
    "units": hp.choice("units", units_range),
    "learning_rate": hp.uniform("learning_rate", 0.0001, 0.001),
    "batch_size": hp.choice("batch_size", batch_size_range),
}

# 定义参数优化函数
def param_hyperopt(max_evals=100):
    trials = Trials()
    # 提前停止条件
    early_stop_fn = no_progress_loss(20)
    # 优化模型
    params_best = fmin(fn=f_model, space=spaces, algo=tpe.suggest, max_evals=max_evals,
                       trials=trials, early_stop_fn=early_stop_fn)
    print('best params:', params_best)
    return params_best, trials
    
# 调用参数寻优函数
params_best, trials = param_hyperopt(10)

# 使用最优参数更新模型
# 由于hyperopt 返回的是参数的下标 需要将对应的参数映射回去
params_best['batch_size'] = batch_size_range[params_best['batch_size']]
params_best['units'] = units_range[params_best['units']]
print(params_best)
# 带入优化参数
dic = f_model(params=params_best)
# 获取模型
model = dic['model']
model.summary()
# 使用优化后的模型预测测试集
print('测试集结果:')
from sklearn.metrics import classification_report, confusion_matrix
pred = model.predict(seq_test)
pred_label = [1 if i[0]>0.5 else 0 for i in pred]
print(confusion_matrix(test_label, pred_label))
print(classification_report(test_label, pred_label))

你可能感兴趣的:(深度学习,神经网络,深度学习,tensorflow,hyperopt,调参)