哈工大pyltp命名实体识别调用

# -*- coding: utf-8 -*-
import sys, os
import pyltp
from pyltp import SentenceSplitter,Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller

paragraph = '叙利亚东古塔地区7日发生疑似化学武器袭击事件,导致70余人丧生。报道一出,叙利亚反对派、美国、英国、法国等纷纷指责叙政府军使用化学武器袭击无辜平民。但叙利亚坚决否认,并指责西方和叙反对派造谣,目的是保护被围困的恐怖分子。俄外交部则认为,该谣言旨在袒护恐怖分子,并为外部势力发动打击寻找借口。'

sentence = SentenceSplitter.split(paragraph)[0]
print(sentence)

segmentor = Segmentor()
segmentor.load("ltp_data_v3.4.0/cws.model")
words = segmentor.segment(paragraph)
print(" ".join(words))

postagger = Postagger()
postagger.load("ltp_data_v3.4.0/pos.model")
postags = postagger.postag(words)
print(" ".join(postags))

parser = Parser()
parser.load("ltp_data_v3.4.0/parser.model")
arcs = parser.parse(words, postags)

print(" ".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
#for arc in arcs:
#    print(arc.head)
#    print(arc.relation)
recognizer = NamedEntityRecognizer()
recognizer.load("ltp_data_v3.4.0/ner.model")
netag = recognizer.recognize(words, postags)
for word,ntag in zip(words, netag):
    if ntag != 'O':
        print(word + '/' + ntag)
print(" ".join(netag))

# 命名实体识别
word_list = ['欧几里得', '是', '西元前', '三', '世纪', '的', '希腊', '数学家', '。']
postags_list = ['nh', 'v', 'nt', 'm', 'n', 'u', 'ns', 'n', 'wp']
nertags = recognizer.recognize(word_list, postags_list)
for word,ntag in zip(word_list, nertags):
    if ntag != 'O':
        print(word + '/' + ntag)
#print (" ".join(word_list))
print (' '.join(nertags))

segmentor.release()
postagger.release()
parser.release()
recognizer.release()
哈工大pyltp命名实体识别调用_第1张图片
欢迎关注人工智能见闻公众号

你可能感兴趣的:(哈工大pyltp命名实体识别调用)