在Keras中,根据验证集的指标来保留最优模型,最简便的方法是通过自带的ModelCheckpoint,比如
checkpoint = ModelCheckpoint(filepath = '../best_model.weights',
monitor = 'val_acc',
verbose = 1,
save_best_only = true)
然而,这种方法虽然简单,但是有一个明显的缺点,就是里边的指标是由compile的metrics来确定的,而Keres中自定义一个metric,需要写成张量运算才行,也就是说如果你期望的指标并不能写成张量运算(比如bleu等指标),那么就没法写成一个metric函数了,也就不能用这个方案了。
于是,一个万能的方案就出来了:自己写回调器,爱算什么就算什么。比如:
from keras.callbacks import Callback
def evaluate(): # 评测函数
pred = model.predict(x_test)
return np.mean(pred.argmax(axis=1)==y_test) #需要算什么都可以
# 定义Callback器,计算验证集的acc,并保存最优模型
class Evaluate(Callback):
def __init__(self):
self.acc = []
self.highest = 0
def on_epoch_end(self,epoch,logs=None):
acc = evaluate()
self.accs.append(acc)
if acc >= self.highest: # 保存最优模型权重
self.highest = acc
model.save_weights('best_model.weights')
# 算什么都可以
print('acc:%s,highest:%s'%(acc,self.highest))
evaluator = Evaluate()
model.fit(x_train,
y_train,
epochs=10,
callbacks=[evaluator])
训练过程中还有可能对超参数进行微调,比如最常见的一个需求是根据epoch来调整学习率,,这可以简单地通LearningRateScheduler 来实现,它也属于回调器之一。
from keras.callbacks import LearningRateScheduler
def lr_schedule(epoch):
# 根据epoch返回不同的学习率
if epoch < 50:
lr = 1e-2
elif epoch < 80:
lr = 1e-3
else:
lr = 1e-4
return lr
lr_scheduler = LearningRateScheduler(lr_schedule)
model.fit(x_train,
y_train,
epochs=10,
callbacks=[evaluator, lr_scheduler])
如果是其他超参数呢?比如前面center loss的lamb,或者是类似的正则项。这种情况下,我们需要将lamb设为一个Variable,然后自定义一个回调器来动态赋值。比如苏神定义的一个loss:
def mycrossentropy(y_true, y_pred, e=0.1):
loss1 = K.categorical_crossentropy(y_true, y_pred)
loss2 = K.categorical_crossentropy(K.ones_like(y_pred)/nb_classes, y_pred)
return (1-e)*loss1 + e*loss2
如果要动态改变参数e,那么可以改为
e = K.variable(0.1)
def mycrossentropy(y_true, y_pred, e=0.1):
loss1 = K.categorical_crossentropy(y_true, y_pred)
loss2 = K.categorical_crossentropy(K.ones_like(y_pred)/nb_classes, y_pred)
return (1-e)*loss1 + e*loss2
model.compile(loss=mycrossentropy,
optimizer='adam')
class callback4e(Callback):
def __init__(self, e):
self.e = e
def on_epoch_end(self, epoch, logs={}):
if epoch > 100: # 100个epoch之后设为0.01
K.set_value(self.e, 0.01)
model.fit(x_train,
y_train,
epochs=10,
callbacks=[callback4e(e)])
注意Callback类共支持六种在不同阶段的执行函数:on_epoch_begin、on_epoch_end、on_batch_begin、on_batch_end、on_train_begin、on_train_end,每个函数所执行的阶段不一样(根据名字很容易判断),可以组合起来实现很复杂的功能。比如warmup,就是指设定了默认学习率后,并不是一开始就用这个学习率训练,而是在前几个epoch中,从零慢慢增加到默认的学习率,这个过程可以理解为在为模型调整更好的初始化。参考代码:
class Evaluate(Callback):
def __init__(self):
self.num_passed_batchs = 0
self.warmup_epochs = 10
def on_batch_begin(self, batch, logs=None):
# params是模型自动传递给Callback的一些参数
if self.params['steps'] == None:
self.steps_per_epoch = np.ceil(1. * self.params['samples'] / self.params['batch_size'])
else:
self.steps_per_epoch = self.params['steps']
if self.num_passed_batchs < self.steps_per_epoch * self.warmup_epochs:
# 前10个epoch中,学习率线性地从零增加到0.001
K.set_value(self.model.optimizer.lr,
0.001 * (self.num_passed_batchs + 1) / self.steps_per_epoch / self.warmup_epochs)
self.num_passed_batchs += 1
另外一个在苏神的主题抽取例程中:bert之主题抽取
from keras.callbacks import Callback
learning_rate = 5e-5
min_learning_rate = 1e-5
def extract_entity(text_in, c_in):
if c_in not in classes:
return 'NaN'
text_in = u'___%s___%s' % (c_in, text_in)
text_in = text_in[:510]
_tokens = tokenizer.tokenize(text_in)
_x1, _x2 = tokenizer.encode(first=text_in)
_x1, _x2 = np.array([_x1]), np.array([_x2])
_ps1, _ps2 = model.predict([_x1, _x2])
_ps1, _ps2 = softmax(_ps1[0]), softmax(_ps2[0])
for i, _t in enumerate(_tokens):
if len(_t) == 1 and re.findall(u'[^\u4e00-\u9fa5a-zA-Z0-9\*]', _t) and _t not in additional_chars:
_ps1[i] -= 10
start = _ps1.argmax()
for end in range(start, len(_tokens)):
_t = _tokens[end]
if len(_t) == 1 and re.findall(u'[^\u4e00-\u9fa5a-zA-Z0-9\*]', _t) and _t not in additional_chars:
break
end = _ps2[start:end+1].argmax() + start
a = text_in[start-1: end]
return a
class Evaluate(Callback):
def __init__(self):
self.ACC = []
self.best = 0.
self.passed = 0
def on_batch_begin(self, batch, logs=None):
"""第一个epoch用来warmup,第二个epoch把学习率降到最低
"""
if self.passed < self.params['steps']:
lr = (self.passed + 1.) / self.params['steps'] * learning_rate
K.set_value(self.model.optimizer.lr, lr)
self.passed += 1
elif self.params['steps'] <= self.passed < self.params['steps'] * 2:
lr = (2 - (self.passed + 1.) / self.params['steps']) * (learning_rate - min_learning_rate)
lr += min_learning_rate
K.set_value(self.model.optimizer.lr, lr)
self.passed += 1
def on_epoch_end(self, epoch, logs=None):
acc = self.evaluate()
self.ACC.append(acc)
if acc > self.best:
self.best = acc
train_model.save_weights('best_model.weights')
print 'acc: %.4f, best acc: %.4f\n' % (acc, self.best)
def evaluate(self):
A = 1e-10
F = open('dev_pred.json', 'w')
for d in tqdm(iter(dev_data)):
R = extract_entity(d[0], d[1])
if R == d[2]:
A += 1
s = ', '.join(d + (R,))
F.write(s.encode('utf-8') + '\n')
F.close()
return A / len(dev_data)
参考苏神: 让kereas更酷一些