bert 新闻分类系统

import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1070
import os
import transformers
from transformers import (
    DataProcessor,
    InputExample,
    BertForSequenceClassification,
    BertTokenizer,
    glue_convert_examples_to_features,
)

本任务有14个类别

file = open("cn-classification/THUCNews/classes.txt", "r", encoding="utf-8")
line = file.read()
label_types = line.split("\n")[0:14]
label_types
['财经',
 '股票',
 '科技',
 '社会',
 '游戏',
 '星座',
 '时政',
 '时尚',
 '教育',
 '房产',
 '彩票',
 '家居',
 '娱乐',
 '体育']

查看一下数据

fo = open("./cn-classification/dev.txt", "r", encoding="utf-8")
line = fo.readline()
fo.close()

print("新闻内容:\n",line[2:])
print("新闻类型:",label_types[int(line[0])+1])
新闻内容:
 死叉+跌停 后市操作紧跟三大信号   广州万隆   今日消息面总体偏空。受强震加核爆的利空影响日本股市昨日暴跌6.18%,今日早盘再度大幅低开跌幅也高达6%,受此影响A股早盘也弱势低开9.7点,开盘后直接展开5浪杀跌,并击穿2900点,表明市场情绪还处于恐慌中。板块方面,西藏板块一枝独秀,维持红盘,其余板块全线尽墨,昨日强势的水泥、触摸屏概念跌幅居前,煤炭板块明显领跌,其中600971恒源煤电跌停,这在近段时间首次出现跌停的情况,说明市场人气明显偏弱。目前股指下跌1.5%,下跌43点,成交量昨日小幅萎缩,个股普跌。   今日有三个信号要重点关注:1. 要重点注意今日调整时的力度和量能,如果调整时缩量则属正常;如果是放量下杀无力则继续需观望;2. 注意大盘反弹时银行、地产等蓝筹股的表现,如强力护盘可说明下跌幅度也不大;如表现疲软就继续观望。3. 经过早盘的大幅杀跌,5日线已经和10日线形成死叉,目前暂时在30日线上止跌,后市可重点关注30日线的支撑以及5日线和10日线的死叉何时解开。  欢迎发表评论  我要评论

新闻类型: 社会

bert 文本分类processor

class SstProcessor(DataProcessor):
    """Processor for the SST-2 data set (GLUE version)."""
    def read_files(self,file_path):
        fo = open(file_path, "r", encoding="utf-8")
        print("文件名为: ", fo.name)
        line = fo.readline()
        lines = []
        while line:
            lines.append(line)
            line = fo.readline()
        fo.close()
        return lines

    def get_train_examples(self, data_dir):
        """See base class."""
        return self._create_examples(self.read_files(os.path.join(data_dir, "train.txt")), "train")

    def get_dev_examples(self, data_dir):
        """See base class."""
        return self._create_examples(self.read_files(os.path.join(data_dir,  "dev.txt")), "dev")

    def get_test_examples(self, data_dir):
        """See base class."""
        return self._create_examples(self.read_files(os.path.join(data_dir,  "test.txt")), "test")

    def get_labels(self):
        """See base class."""
        return [1,2,3,4,5,6,7,8,9,10,11,12,13,14],
    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            guid = "%s-%s" % (set_type, i)
            text_a = line.split(',')[1]
            label = int(line.split(',')[0])
            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
        return examples

读取数据

file_path = "./"
myProcessor = SstProcessor()
train_examples = myProcessor.get_train_examples(file_path)
dev_examples = myProcessor.get_dev_examples(file_path)
test_examples = myProcessor.get_test_examples(file_path)
文件名为:  ./train.txt
文件名为:  ./dev.txt
文件名为:  ./test.txt

本任务我们选择 bert-base-chinese 预训练模型

print('下载 BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
下载 BERT tokenizer...

把新闻 text 转换成features 参考资料: https://github.com/huggingface/transformers/blob/master/src/transformers/data/processors/glue.py#L158

print("start input features")
train_features = glue_convert_examples_to_features(
    examples=train_examples[0:500],
    tokenizer= tokenizer,
    max_length=128,
    task=None,
    label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
    output_mode="classification",
)
print("finish train_features.....")
start input features
finish train_features.....
dev_features = glue_convert_examples_to_features(
    examples=dev_examples[0:500],
    tokenizer= tokenizer,
    max_length=128,
    task=None,
    label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
    output_mode="classification",
)
print("finish dev_features.....")
finish dev_features.....
test_features = glue_convert_examples_to_features(
    examples=test_examples[0:500],
    tokenizer= tokenizer,
    max_length=128,
    task=None,
    label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
    output_mode="classification",
)
print("finish test_examples.....")
finish test_examples.....

设计 dataset

import torch
import numpy
from torch.utils.data import TensorDataset, random_split
def build_dataset(features):
    input_ids = []
    attention_mask = []
    token_type_ids = []
    train_y = []
    for feature in features:
        input_ids.append(feature.input_ids)
        attention_mask.append(feature.attention_mask)
        token_type_ids.append(feature.token_type_ids)
        train_y.append(feature.label)

    input_ids = torch.from_numpy(numpy.array(input_ids)).long()
    attention_mask = torch.from_numpy(numpy.array(attention_mask)).long()
    token_type_ids = torch.from_numpy(numpy.array(token_type_ids)).long()
    train_y = torch.from_numpy(numpy.array(train_y)).long()
    dataset = TensorDataset(input_ids, attention_mask, token_type_ids, train_y)
    return dataset

把features 放入data set中

train_set = build_dataset(train_features)
dev_set = build_dataset(dev_features)
test_set = build_dataset(test_features)

把dataset 装入dataloader中

from torch.utils.data import TensorDataset, DataLoader
train_dataloader = DataLoader(train_set, batch_size=8, shuffle=True)
validation_dataloader = DataLoader(dev_set, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=8, shuffle=True)

导入预训练模型

bert_model = BertForSequenceClassification.from_pretrained(
    "bert-base-chinese", # 使用 12-layer 的 BERT 模型.
    num_labels = 14, # 多分类任务的输出标签为 4个.                     
    output_attentions = False, # 不返回 attentions weights.
    output_hidden_states = False, # 不返回 all hidden-states.
)
bert_model.cuda()
Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.





BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (1): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (2): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (3): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (4): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (5): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (6): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (7): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (8): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (9): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (10): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (11): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
          (intermediate): BertIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
          )
          (output): BertOutput(
            (dense): Linear(in_features=3072, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (pooler): BertPooler(
      (dense): Linear(in_features=768, out_features=768, bias=True)
      (activation): Tanh()
    )
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Linear(in_features=768, out_features=14, bias=True)
)

设置模型参数

from transformers import AdamW

# AdamW 是一个 huggingface library 的类,'W' 是'Weight Decay fix"的意思。
optimizer = AdamW(bert_model.parameters(),
                  lr = 2e-5, # args.learning_rate - 默认是 5e-5
                  eps = 1e-8 # args.adam_epsilon  - 默认是 1e-8, 是为了防止衰减率分母除到0
                )
from transformers import get_linear_schedule_with_warmup

# bert 推荐 epochs 在2到4之间为好。
epochs = 4

# training steps 的数量: [number of batches] x [number of epochs]. 
total_steps = len(train_dataloader) * epochs

# 设计 learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

计算模型准确率

#import numpy as np
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

计算模型运行时间

import time
import datetime
def format_time(elapsed):    
    elapsed_rounded = int(round((elapsed)))    
    # 返回 hh:mm:ss 形式的时间
    return str(datetime.timedelta(seconds=elapsed_rounded))

模型训练函数

def training(train_dataloader,model):
    #t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model.train()
  
    for step, batch in enumerate(train_dataloader):

        # 每隔40个batch 输出一下所用时间.
        if step % 40 == 0 and not step == 0:            
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))


        # `batch` 包括3个 tensors:
        #   [0]: input ids 
        #   [1]: attention masks
        #   [2]: labels 
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[3].to(device)
        
        # input_ids, attention_mask, token_type_ids, train_y
        

        # 清空梯度
        bert_model.zero_grad()        

        # forward        
        # 参考 https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
        loss, logits = model(b_input_ids, 
                             token_type_ids=None, 
                             attention_mask=b_input_mask, 
                             labels=b_labels)
       
        total_train_loss += loss.item()

        # backward 更新 gradients.
        loss.backward()

        # 减去大于1 的梯度,将其设为 1.0, 以防梯度爆炸.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # 更新模型参数 
        optimizer.step()
       
        # 更新 learning rate.
        scheduler.step()        
             
        logit = logits.detach().cpu().numpy()
        label_id = b_labels.to('cpu').numpy()
        # 计算training 句子的准确度.
        total_train_accuracy += flat_accuracy(logit, label_id)    
    
    
     
    # 计算batches的平均损失.
    avg_train_loss = total_train_loss / len(train_dataloader)      

    
    # 训练集的准确率.
    avg_train_accuracy = total_train_accuracy / len(train_dataloader)
    
    print("  训练准确率: {0:.2f}".format(avg_train_accuracy))
    print("  平均训练损失 loss: {0:.2f}".format(avg_train_loss))
    return avg_train_accuracy,avg_train_loss 

模型测试函数

def train_evalution(test_dataloader,model):
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0
    model.eval()

    for batch in test_dataloader:        

        # `batch` 包括3个 tensors:
        #   [0]: input ids 
        #   [1]: attention masks
        #   [2]: labels 
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[3].to(device)        

        # 在valuation 状态,不更新权值,不改变计算图
        with torch.no_grad():        

            # 参考 https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
            (loss, logits) = model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)

        # 计算 validation loss.
        total_eval_loss += loss.item()        
        logit = logits.detach().cpu().numpy()
        label_id = b_labels.to('cpu').numpy()

        # 计算 validation 句子的准确度.
        total_eval_accuracy += flat_accuracy(logit, label_id)
    # 计算 validation 的准确率.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    return avg_val_accuracy,total_eval_loss,len(test_dataloader)     

训练模型

import os
import random
import numpy as np
from transformers import WEIGHTS_NAME, CONFIG_NAME
from torch.utils.tensorboard import SummaryWriter

output_dir = "./models/"
output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
output_config_file = os.path.join(output_dir, CONFIG_NAME)
# 代码参考 https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
writer = SummaryWriter("./log_models/")
# 设置随机种子.
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# 设置总时间.
total_t0 = time.time()
best_val_accuracy = 0

for epoch_i in range(0, epochs):      
    print('Epoch {:} / {:}'.format(epoch_i + 1, epochs))  
    
    # ========================================
    #               training
    # ========================================
    t0 = time.time()
    avg_train_accuracy,avg_train_loss = training(train_dataloader,bert_model)
    # 计算训练时间.
    training_time = format_time(time.time() - t0)
    print("  训练时间: {:}".format(training_time))

    
    # ========================================
    #               Validation
    # ========================================

    t0 = time.time()

    avg_val_accuracy,total_eval_loss,valid_dataloader_length = train_evalution(validation_dataloader,bert_model)
    
    print("")
    print("  测试准确率: {0:.2f}".format(avg_val_accuracy))
    
    if avg_val_accuracy > best_val_accuracy:
        best_val_accuracy = avg_val_accuracy
        torch.save(bert_model.state_dict(),output_model_file)
        bert_model.config.to_json_file(output_config_file)
        tokenizer.save_vocabulary(output_dir)         

    # 计算batches的平均损失.
    avg_val_loss = total_eval_loss / valid_dataloader_length
    
    # 计算validation 时间.
    validation_time = format_time(time.time() - t0)
    
    print("  平均测试损失 Loss: {0:.2f}".format(avg_val_loss))
    print("  测试时间: {:}".format(validation_time))
    
    writer.add_scalars(f'Acc/Loss', {
        'Training Loss': avg_train_loss,
        'Valid Loss': avg_val_loss,
        'Valid Accur': avg_val_accuracy
    }, epoch_i+1)
    

print("训练一共用了 {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
writer.close()
Epoch 1 / 4
  Batch    40  of     63.    Elapsed: 0:00:13.
  训练准确率: 0.98
  平均训练损失 loss: 0.20
  训练时间: 0:00:21

  测试准确率: 0.89
  平均测试损失 Loss: 0.45
  测试时间: 0:00:05
Epoch 2 / 4
  Batch    40  of     63.    Elapsed: 0:00:13.
  训练准确率: 0.98
  平均训练损失 loss: 0.19
  训练时间: 0:00:20

  测试准确率: 0.89
  平均测试损失 Loss: 0.45
  测试时间: 0:00:04
Epoch 3 / 4
  Batch    40  of     63.    Elapsed: 0:00:12.
  训练准确率: 0.99
  平均训练损失 loss: 0.18
  训练时间: 0:00:19

  测试准确率: 0.89
  平均测试损失 Loss: 0.46
  测试时间: 0:00:04
Epoch 4 / 4
  Batch    40  of     63.    Elapsed: 0:00:12.
  训练准确率: 0.98
  平均训练损失 loss: 0.20
  训练时间: 0:00:19

  测试准确率: 0.89
  平均测试损失 Loss: 0.46
  测试时间: 0:00:04
训练一共用了 0:01:35 (h:mm:ss)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-nm1fHIsn-1595507910506)(attachment:image.png)]

在测试集合上预测

avg_val_accuracy,_,_ = train_evalution(test_dataloader,bert_model)
avg_val_accuracy
0.8988095238095238

你可能感兴趣的:(bert 新闻分类系统)