pipline是Huggingface的一个基本工具,可以理解为一个端到端(end-to-end)的一键调用Transformer模型的工具。它具备了数据预处理、模型处理、模型输出后处理等步骤,可以直接输入原始数据,给出预测结果,十分方便。
from transformers import pipeline
#文本分类
classifier = pipeline("sentiment-analysis")
result = classifier("I hate you")[0]
print(result)
result = classifier("I love you")[0]
print(result)
{'label': 'NEGATIVE', 'score': 0.9991129040718079}
{'label': 'POSITIVE', 'score': 0.9998656511306763}
from transformers import pipeline
#翻译为德语
translator = pipeline("translation_en_to_de")
sentence = "Hugging Face is a technology company based in New York and Paris"
translator(sentence, max_length=40)
[{'translation_text': 'Hugging Face ist ein Technologieunternehmen mit Sitz in New York und Paris.'}]
from transformers import pipeline
#文本生成
text_generator = pipeline("text-generation")
text_generator("As far as I am concerned, I will",
max_length=50,
do_sample=False)
[{'generated_text': 'As far as I am concerned, I will be the first to admit that I am not a fan of the idea of a "free market." I think that the idea of a free market is a bit of a stretch. I think that the idea'}]
from transformers import pipeline
#完形填空
unmasker = pipeline("fill-mask")
sentence = 'HuggingFace is creating a that the community uses to solve NLP tasks.'
unmasker(sentence)
[{'score': 0.17927546799182892, 'token': 3944, 'token_str': ' tool', 'sequence': 'HuggingFace is creating a tool that the community uses to solve NLP tasks.'},
{'score': 0.1134939193725586, 'token': 7208, 'token_str': ' framework', 'sequence': 'HuggingFace is creating a framework that the community uses to solve NLP tasks.'},
{'score': 0.052435602992773056, 'token': 5560, 'token_str': ' library', 'sequence': 'HuggingFace is creating a library that the community uses to solve NLP tasks.'},
{'score': 0.034935541450977325, 'token': 8503, 'token_str': ' database', 'sequence': 'HuggingFace is creating a database that the community uses to solve NLP tasks.'},
{'score': 0.028602560982108116, 'token': 17715, 'token_str': ' prototype', 'sequence': 'HuggingFace is creating a prototype that the community uses to solve NLP tasks.'}]
from transformers import pipeline
#阅读理解
question_answerer = pipeline("question-answering")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.
"""
result = question_answerer(question="What is extractive question answering?",
context=context)
print(result)
result = question_answerer(
question="What is a good example of a question answering dataset?",
context=context)
print(result)
{'score': 0.6177281141281128, 'start': 34, 'end': 95, 'answer': 'the task of extracting an answer from a text given a question'}
{'score': 0.5152307152748108, 'start': 148, 'end': 161, 'answer': 'SQuAD dataset'}
from transformers import pipeline
#命名实体识别
ner_pipe = pipeline("ner")
sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,
therefore very close to the Manhattan Bridge which is visible from the window."""
for entity in ner_pipe(sequence):
print(entity)
{'entity': 'I-ORG', 'score': 0.99957865, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
{'entity': 'I-ORG', 'score': 0.9909764, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
{'entity': 'I-ORG', 'score': 0.9982224, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
{'entity': 'I-ORG', 'score': 0.9994879, 'index': 4, 'word': 'Inc', 'start': 13, 'end': 16}
{'entity': 'I-LOC', 'score': 0.9994344, 'index': 11, 'word': 'New', 'start': 40, 'end': 43}
{'entity': 'I-LOC', 'score': 0.99931955, 'index': 12, 'word': 'York', 'start': 44, 'end': 48}
{'entity': 'I-LOC', 'score': 0.9993794, 'index': 13, 'word': 'City', 'start': 49, 'end': 53}
{'entity': 'I-LOC', 'score': 0.98625815, 'index': 19, 'word': 'D', 'start': 79, 'end': 80}
{'entity': 'I-LOC', 'score': 0.95142686, 'index': 20, 'word': '##UM', 'start': 80, 'end': 82}
{'entity': 'I-LOC', 'score': 0.9336589, 'index': 21, 'word': '##BO', 'start': 82, 'end': 84}
{'entity': 'I-LOC', 'score': 0.9761654, 'index': 28, 'word': 'Manhattan', 'start': 114, 'end': 123}
{'entity': 'I-LOC', 'score': 0.9914629, 'index': 29, 'word': 'Bridge', 'start': 124, 'end': 130}