https://kexue.fm/archives/8454
#! -*- coding: utf-8 -*-
import os
os.environ['TF_KERAS'] = '1'
from bert4keras.backend import keras
from bert4keras.models import build_transformer_model
from bert4keras.tokenizers import Tokenizer
'''
生成saved-model的时候
bert4keras==0.10.6
keras==2.3.1
tensorflow-gpu==1.15.4
tensorflow-hub==0.12.0
h5py==2.10.0
'''
'''
生成后将saved-model转onnx,我是直接pip install tensorflow==2.5.0
建议创建一个新环境
bert4keras==0.10.6
keras==2.3.1
tensorflow==2.5.0
h5py==3.1.0
keras-nightly==2.5.0.dev2021032900
tensorflow-estimator==2.5.0
tf2onnx==1.9.1
onnx==1.9.0
onnxruntime==1.8.0
然后
# python -m tf2onnx.convert --saved-model encoder_model_tf --output encoder_simbert.onnx --opset 13
# python -m tf2onnx.convert --saved-model generate_model_tf --output generate_simbert.onnx --opset 13
'''
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 基本信息
maxlen = 128
# 模型配置
model_file = 'chinese_roformer-sim-char_L-12_H-768_A-12'
config_path = '{}/bert_config.json'.format(model_file)
# checkpoint_path = '{}/bert_model.ckpt'.format(model_file)
checkpoint_path = None
dict_path = '{}/vocab.txt'.format(model_file)
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
roformer = build_transformer_model(
config_path,
checkpoint_path,
model='roformer',
application='unilm',
with_pool='linear'
)
# 可以加载自己训练后的模型
output_best_model = 'model_path/best_model.weights'
if checkpoint_path is None:
roformer.load_weights(output_best_model)
# 向量生成模型
# encoder = keras.models.Model(roformer.inputs, roformer.outputs[0])
# encoder.save('model_path/encoder_model_tf', save_format='tf')
# 解码器模型
seq2seq = keras.models.Model(roformer.inputs, roformer.outputs[1])
outputs = [
keras.layers.Lambda(lambda x: x[:, -1])(output)
for output in seq2seq.outputs
]
generate_model = keras.models.Model(seq2seq.inputs, outputs)
generate_model.save('model_path/generate_model_tf', save_format='tf')
# import keras2onnx
# onnx_model = keras2onnx.convert_keras(encoder)
# keras2onnx.save_model(onnx_model, 'bert-sim.onnx')
得到 saved_model 后再转onnx
'''
经过验证这个时候需要tensorflow==2.5.0
安装pip install tensorflow==2.5.0
'''
python -m tf2onnx.convert --saved-model encoder_model_tf --output simbert.onnx --opset 13
# 预测的时候还是用的tensorflow-gpu==1.15.4
如果是在一个环境里面操作,这里装两个tensorflow后再用bert4keras可能会出现问题,
uninstall tensorflow 后再install; h5py的版本也要注意。
pip uninstall keras-nightly
pip uninstall tensorflow
pip uninstall -y tensorflow-gpu
pip install keras==2.3.1
pip install tensorflow-gpu==1.15.4
pip install h5py==2.10.0
--opset 10 也是能保存成功,但是预测的时候出错了。
用Netron查看simbert.onnx模型,
INPUTS
name: Input-Segment
type: float32[unk__3503,unk__3504]
name: Input-Token
type: float32[unk__3505,unk__3506]
import onnxruntime
import numpy as np
from model_path import tokenization
from model_path.bert_input_process import *
class SimBertONNX:
def __init__(self, weights, max_seg_len=128, vocab_file='chinese_roformer-sim-char_L-12_H-768_A-12/vocab.txt'):
self.sess = onnxruntime.InferenceSession(weights)
self.max_seg_len = max_seg_len
self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
def predict(self, sent):
feature = format_ner_input(sent, max_seq_length=self.max_seg_len, tokenizer=self.tokenizer)
token = [i for i in feature['input_ids'][0] if i != 0]
input_dict = {'Input-Token': [token], "Input-Segment": [[0]*len(token)]}
# {'Input-Token': [[101, 4636, 1668, 4686, 6994, 102]], 'Input-Segment': [[0, 0, 0, 0, 0, 0]]}
result = self.sess.run(None, input_dict)[0][0]
return result
pb_path = "model_path/simbert13.onnx"
simbert = SimBertONNX(pb_path)
print(simbert.predict('科学空间'))
[-1.11289009e-01 -5.35200179e-01 1.08341195e-01 -1.98119685e-01
-5.59085608e-02 4.03575003e-01 1.67680249e-01 -2.41547719e-01
4.21358049e-01 -2.02915221e-01 -1.85670257e-01 -2.22553447e-01
...
# bert_input_process
import numpy as np
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
while True:
total_length = len(tokens_a) + len(tokens_b)
if total_length <= max_length:
break
if len(tokens_a) > len(tokens_b):
tokens_a.pop()
else:
tokens_b.pop()
class InputFeatures(object):
def __init__(self, input_ids, input_mask, segment_ids, label_id, is_real_example=True):
self.input_ids = input_ids
self.input_mask = input_mask
self.segment_ids = segment_ids
self.label_id = label_id
self.is_real_example = is_real_example
class PaddingInputExample(object):
pass
class InputExample(object):
def __init__(self, guid, text_a, text_b=None, label=None):
self.guid = guid
self.text_a = text_a
self.text_b = text_b
self.label = label
def convert_ner_example(example, max_seq_length, tokenizer):
if isinstance(example, PaddingInputExample):
return InputFeatures(
input_ids=[0] * max_seq_length,
input_mask=[0] * max_seq_length,
segment_ids=[0] * max_seq_length,
label_id=[0] * max_seq_length,
is_real_example=False)
tokens_a = tokenizer.tokenize(example.text_a)
tokens_b = None
if example.text_b:
tokens_b = tokenizer.tokenize(example.text_b)
if tokens_b:
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
else:
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0:(max_seq_length - 2)]
tokens = []
segment_ids = []
label_id = [0] * max_seq_length
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
if tokens_b:
for token in tokens_b:
tokens.append(token)
segment_ids.append(1)
tokens.append("[SEP]")
segment_ids.append(1)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
label_id[0] = input_ids[0]
label_id[len(tokens) - 1] = input_ids[len(tokens) - 1]
input_mask = [1] * len(input_ids)
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
feature = InputFeatures(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids,
label_id=label_id,
is_real_example=True)
return feature
def format_ner_input(text_a, text_b=None, gid="id", max_seq_length=128, tokenizer=None):
predict_example = InputExample(gid, text_a, text_b, None)
feature = convert_ner_example(predict_example, max_seq_length, tokenizer)
return {
"input_ids": [feature.input_ids],
"input_mask": [feature.input_mask],
"segment_ids": [feature.segment_ids],
"label_ids": [feature.label_id],
}
def format_simbert_input(text_a_list, max_seq_length=128, tokenizer=None):
input_ids = []
segment_ids = []
for text_a in text_a_list:
predict_example = InputExample(guid="id", text_a=text_a, text_b=None, label=None)
feature = convert_ner_example(predict_example, max_seq_length, tokenizer)
input_ids.append(feature.input_ids)
segment_ids.append(feature.segment_ids)
return input_ids, segment_ids
def softmax(x, axis=-1):
x = x - x.max(axis=axis, keepdims=True)
x = np.exp(x)
return x / x.sum(axis=axis, keepdims=True)
class AutoRegressiveDecoder(object):
def __init__(self, start_id, end_id, maxlen, minlen=1):
self.start_id = start_id
self.end_id = end_id
self.maxlen = maxlen
self.minlen = minlen
self.models = {}
if start_id is None:
self.first_output_ids = np.empty((1, 0), dtype=int)
else:
self.first_output_ids = np.array([[self.start_id]])
@staticmethod
def wraps(default_rtype='probas', use_states=False):
def actual_decorator(predict):
def new_predict(self, inputs, output_ids, states, temperature=1, rtype=default_rtype):
assert rtype in ['probas', 'logits']
prediction = predict(self, inputs, output_ids, states)
if not use_states:
prediction = (prediction, None)
if default_rtype == 'logits':
prediction = (softmax(prediction[0] / temperature), prediction[1])
elif temperature != 1:
probas = np.power(prediction[0], 1.0 / temperature)
probas = probas / probas.sum(axis=-1, keepdims=True)
prediction = (probas, prediction[1])
if rtype == 'probas':
return prediction
else:
return np.log(prediction[0] + 1e-12), prediction[1]
return new_predict
return actual_decorator
def predict(self, inputs, output_ids, states=None):
raise NotImplementedError
def random_sample(self, inputs, n, topp=None, states=None, temperature=1, min_ends=1):
inputs = [np.array([i]) for i in inputs]
output_ids = self.first_output_ids
results = []
for step in range(self.maxlen):
probas, states = self.predict(
inputs, output_ids, states, temperature, 'probas'
) # 计算当前概率
probas /= probas.sum(axis=1, keepdims=True) # 确保归一化
if step == 0: # 第1步预测后将结果重复n次
probas = np.repeat(probas, n, axis=0)
inputs = [np.repeat(i, n, axis=0) for i in inputs]
output_ids = np.repeat(output_ids, n, axis=0)
if topp is not None:
p_indices = probas.argsort(axis=1)[:, ::-1] # 从高到低排序
probas = np.take_along_axis(probas, p_indices, axis=1) # 排序概率
cumsum_probas = np.cumsum(probas, axis=1) # 累积概率
flag = np.roll(cumsum_probas >= topp, 1, axis=1) # 标记超过topp的部分
flag[:, 0] = False # 结合上面的np.roll,实现平移一位的效果
probas[flag] = 0 # 后面的全部置零
probas /= probas.sum(axis=1, keepdims=True) # 重新归一化
sample_func = lambda p: np.random.choice(len(p), p=p) # 按概率采样函数
sample_ids = np.apply_along_axis(sample_func, 1, probas) # 执行采样
sample_ids = sample_ids.reshape((-1, 1)) # 对齐形状
if topp is not None:
sample_ids = np.take_along_axis(p_indices, sample_ids, axis=1)
output_ids = np.concatenate([output_ids, sample_ids], 1) # 更新输出
end_counts = (output_ids == self.end_id).sum(1) # 统计出现的end标记
if output_ids.shape[1] >= self.minlen: # 最短长度判断
flag = (end_counts == min_ends) # 标记已完成序列
if flag.any(): # 如果有已完成的
for ids in output_ids[flag]: # 存好已完成序列
results.append(ids)
flag = (flag == False) # 标记未完成序列
inputs = [i[flag] for i in inputs] # 只保留未完成部分输入
output_ids = output_ids[flag] # 只保留未完成部分候选集
end_counts = end_counts[flag] # 只保留未完成部分end计数
if len(output_ids) == 0:
break
# 如果还有未完成序列,直接放入结果
for ids in output_ids:
results.append(ids)
# 返回结果
return results