demo.py / NGnews / WNUT_17

基于Classifier的例子


from flair.data import Corpus
from flair.datasets import WNUT_17, NEWSGROUPS
from typing import List
from flair.embeddings import FastTextEmbeddings, ELMoEmbeddings, TokenEmbeddings, WordEmbeddings, FlairEmbeddings, BertEmbeddings, PooledFlairEmbeddings
from flair.embeddings import DocumentRNNEmbeddings, DocumentPoolEmbeddings
# 1. get the corpus
corpus: Corpus = NEWSGROUPS()
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'class'

# 3. make the tag dictionary from the corpus
label_dictionary = corpus.make_label_dictionary()
print(label_dictionary)

# 4. initialize embeddings
news_backward = "/home/huyufeng/flair/flair/checkpoints/news-backward-0.4.1.pt"
fast_text_embedding = WordEmbeddings(".flair/embeddings/en-fasttext-news-300d-1M")
bert_path = "/home/huyufeng/glove/uncased_L-12_H-768_A-12"
bert_embedding = BertEmbeddings(bert_path, layers='-1')
glove_embedding = WordEmbeddings('glove')
embeddings: List[TokenEmbeddings] = [
    # comment in these lines to use flair embeddings
    fast_text_embedding,
]

# document_embeddings = DocumentPoolEmbeddings([embeddings], fine_tune_mode='nonlinear')
document_embeddings = DocumentRNNEmbeddings(embeddings,
                                          hidden_size=128,
                                          rnn_layers=1,
                                          bidirectional=True,
                                          rnn_type='LSTM'
                                          )

# 5. initialize sequence TextClassifier
from flair.models import TextClassifier
classifier: TextClassifier = TextClassifier(document_embeddings=document_embeddings,
                                          label_dictionary=label_dictionary,
                                        #   label_type=tag_type,
                                          ).cuda()

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(classifier, corpus)

# 7. start training
root = "resources/taggers/"
# from date import 
file_root = root + 'test/'
trainer.train(file_root,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_weights(file_root + 'weights.txt')

基于 Tagging模型的例子


from flair.data import Corpus
from flair.datasets import WNUT_17
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from typing import List

# 1. get the corpus
corpus: Corpus = WNUT_17().downsample(0.1)
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

# 4. initialize embeddings
embedding_types: List[TokenEmbeddings] = [

    WordEmbeddings('glove'),

    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use flair embeddings
    # FlairEmbeddings('news-forward'),
    # FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_weights('resources/taggers/example-ner/weights.txt')

你可能感兴趣的:(demo.py / NGnews / WNUT_17)