from utils.data_processor import DataProcessor, \
convert_examples_to_features, BuildDataSet, \
HMMDataProcessor
import torch
def trans_features(input_text,word2id):
# max_seq_length 一个句子最大的长度
max_seq_length=64
unk_token=''
pad_token=''
pad_token_label_id=-100
text_list=[]
for word in input_text:
word_tokens = word2id.get(word)
text_list.append(word_tokens)
padding_length = max_seq_length - len(text_list)
token_list =text_list+[word2id[pad_token]] * padding_length
return token_list
def conver_index2tag(input_index):
label_list=['O', 'B-NAME', 'E-NAME', 'B-CONT', 'I-CONT', 'E-CONT', 'B-RACE', 'E-RACE', 'B-TITLE', 'I-TITLE', 'E-TITLE', 'B-EDU', 'I-EDU', 'E-EDU', 'B-ORG', 'I-ORG', 'E-ORG', 'I-NAME', 'B-PRO', 'I-PRO', 'E-PRO', 'S-RACE', 'S-NAME', 'B-LOC', 'I-LOC', 'E-LOC', 'I-RACE', 'S-ORG']
tag_list=[]
for i in input_index:
tag_list.append(label_list[i])
return tag_list
def show(input_text,tag):
output_list=[]
for index ,word in enumerate (input_text):
output_list.append([word,tag[index]])
return output_list
if __name__ == '__main__':
processor = DataProcessor(data_dir='./data', do_lower_case=True)
word2id = processor.get_vocab()
vocab_size = len(word2id)
input_text = "张三,1978年6月14日出生于中国台湾省台北市,祖籍山东省临沂市城县,毕业于华冈艺校戏剧科"
token_list = trans_features(input_text, word2id)
print(token_list)
model = torch.load('model-bilstm-2.pt',map_location='cpu')
# 将模型切换到评估模式
model.eval()
# 使用模型进行推理
token_tensor = torch.tensor(token_list, dtype=torch.int32)
input_tensor_ids = token_tensor.clone().detach().type(torch.LongTensor).to('cpu')
output = model(input_tensor_ids)
predict_inedex = torch.argmax(output[0], dim=1)
outcome=predict_inedex[:len(input_text)]
print(input_text)
print(outcome)
tag=conver_index2tag(outcome)
info = show(input_text, tag)
# print(info)
j=0
for i in info:
j=j+1
print(i,end="")
if j%10==0:
print(" ")
结果