import torch
if torch.cuda.is_available():
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1070
import os
import transformers
from transformers import (
DataProcessor,
InputExample,
BertForSequenceClassification,
BertTokenizer,
glue_convert_examples_to_features,
)
file = open("cn-classification/THUCNews/classes.txt", "r", encoding="utf-8")
line = file.read()
label_types = line.split("\n")[0:14]
label_types
['财经',
'股票',
'科技',
'社会',
'游戏',
'星座',
'时政',
'时尚',
'教育',
'房产',
'彩票',
'家居',
'娱乐',
'体育']
fo = open("./cn-classification/dev.txt", "r", encoding="utf-8")
line = fo.readline()
fo.close()
print("新闻内容:\n",line[2:])
print("新闻类型:",label_types[int(line[0])+1])
新闻内容:
死叉+跌停 后市操作紧跟三大信号 广州万隆 今日消息面总体偏空。受强震加核爆的利空影响日本股市昨日暴跌6.18%,今日早盘再度大幅低开跌幅也高达6%,受此影响A股早盘也弱势低开9.7点,开盘后直接展开5浪杀跌,并击穿2900点,表明市场情绪还处于恐慌中。板块方面,西藏板块一枝独秀,维持红盘,其余板块全线尽墨,昨日强势的水泥、触摸屏概念跌幅居前,煤炭板块明显领跌,其中600971恒源煤电跌停,这在近段时间首次出现跌停的情况,说明市场人气明显偏弱。目前股指下跌1.5%,下跌43点,成交量昨日小幅萎缩,个股普跌。 今日有三个信号要重点关注:1. 要重点注意今日调整时的力度和量能,如果调整时缩量则属正常;如果是放量下杀无力则继续需观望;2. 注意大盘反弹时银行、地产等蓝筹股的表现,如强力护盘可说明下跌幅度也不大;如表现疲软就继续观望。3. 经过早盘的大幅杀跌,5日线已经和10日线形成死叉,目前暂时在30日线上止跌,后市可重点关注30日线的支撑以及5日线和10日线的死叉何时解开。 欢迎发表评论 我要评论
新闻类型: 社会
class SstProcessor(DataProcessor):
"""Processor for the SST-2 data set (GLUE version)."""
def read_files(self,file_path):
fo = open(file_path, "r", encoding="utf-8")
print("文件名为: ", fo.name)
line = fo.readline()
lines = []
while line:
lines.append(line)
line = fo.readline()
fo.close()
return lines
def get_train_examples(self, data_dir):
"""See base class."""
return self._create_examples(self.read_files(os.path.join(data_dir, "train.txt")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
return self._create_examples(self.read_files(os.path.join(data_dir, "dev.txt")), "dev")
def get_test_examples(self, data_dir):
"""See base class."""
return self._create_examples(self.read_files(os.path.join(data_dir, "test.txt")), "test")
def get_labels(self):
"""See base class."""
return [1,2,3,4,5,6,7,8,9,10,11,12,13,14],
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
guid = "%s-%s" % (set_type, i)
text_a = line.split(',')[1]
label = int(line.split(',')[0])
examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
return examples
file_path = "./"
myProcessor = SstProcessor()
train_examples = myProcessor.get_train_examples(file_path)
dev_examples = myProcessor.get_dev_examples(file_path)
test_examples = myProcessor.get_test_examples(file_path)
文件名为: ./train.txt
文件名为: ./dev.txt
文件名为: ./test.txt
print('下载 BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
下载 BERT tokenizer...
print("start input features")
train_features = glue_convert_examples_to_features(
examples=train_examples[0:500],
tokenizer= tokenizer,
max_length=128,
task=None,
label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
output_mode="classification",
)
print("finish train_features.....")
start input features
finish train_features.....
dev_features = glue_convert_examples_to_features(
examples=dev_examples[0:500],
tokenizer= tokenizer,
max_length=128,
task=None,
label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
output_mode="classification",
)
print("finish dev_features.....")
finish dev_features.....
test_features = glue_convert_examples_to_features(
examples=test_examples[0:500],
tokenizer= tokenizer,
max_length=128,
task=None,
label_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14],
output_mode="classification",
)
print("finish test_examples.....")
finish test_examples.....
import torch
import numpy
from torch.utils.data import TensorDataset, random_split
def build_dataset(features):
input_ids = []
attention_mask = []
token_type_ids = []
train_y = []
for feature in features:
input_ids.append(feature.input_ids)
attention_mask.append(feature.attention_mask)
token_type_ids.append(feature.token_type_ids)
train_y.append(feature.label)
input_ids = torch.from_numpy(numpy.array(input_ids)).long()
attention_mask = torch.from_numpy(numpy.array(attention_mask)).long()
token_type_ids = torch.from_numpy(numpy.array(token_type_ids)).long()
train_y = torch.from_numpy(numpy.array(train_y)).long()
dataset = TensorDataset(input_ids, attention_mask, token_type_ids, train_y)
return dataset
train_set = build_dataset(train_features)
dev_set = build_dataset(dev_features)
test_set = build_dataset(test_features)
from torch.utils.data import TensorDataset, DataLoader
train_dataloader = DataLoader(train_set, batch_size=8, shuffle=True)
validation_dataloader = DataLoader(dev_set, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=8, shuffle=True)
bert_model = BertForSequenceClassification.from_pretrained(
"bert-base-chinese", # 使用 12-layer 的 BERT 模型.
num_labels = 14, # 多分类任务的输出标签为 4个.
output_attentions = False, # 不返回 attentions weights.
output_hidden_states = False, # 不返回 all hidden-states.
)
bert_model.cuda()
Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
BertForSequenceClassification(
(bert): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(21128, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
(dropout): Dropout(p=0.1, inplace=False)
(classifier): Linear(in_features=768, out_features=14, bias=True)
)
from transformers import AdamW
# AdamW 是一个 huggingface library 的类,'W' 是'Weight Decay fix"的意思。
optimizer = AdamW(bert_model.parameters(),
lr = 2e-5, # args.learning_rate - 默认是 5e-5
eps = 1e-8 # args.adam_epsilon - 默认是 1e-8, 是为了防止衰减率分母除到0
)
from transformers import get_linear_schedule_with_warmup
# bert 推荐 epochs 在2到4之间为好。
epochs = 4
# training steps 的数量: [number of batches] x [number of epochs].
total_steps = len(train_dataloader) * epochs
# 设计 learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps = 0, # Default value in run_glue.py
num_training_steps = total_steps)
#import numpy as np
def flat_accuracy(preds, labels):
pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
import time
import datetime
def format_time(elapsed):
elapsed_rounded = int(round((elapsed)))
# 返回 hh:mm:ss 形式的时间
return str(datetime.timedelta(seconds=elapsed_rounded))
def training(train_dataloader,model):
#t0 = time.time()
total_train_loss = 0
total_train_accuracy = 0
model.train()
for step, batch in enumerate(train_dataloader):
# 每隔40个batch 输出一下所用时间.
if step % 40 == 0 and not step == 0:
elapsed = format_time(time.time() - t0)
print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
# `batch` 包括3个 tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
b_input_ids = batch[0].to(device)
b_input_mask = batch[1].to(device)
b_labels = batch[3].to(device)
# input_ids, attention_mask, token_type_ids, train_y
# 清空梯度
bert_model.zero_grad()
# forward
# 参考 https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
loss, logits = model(b_input_ids,
token_type_ids=None,
attention_mask=b_input_mask,
labels=b_labels)
total_train_loss += loss.item()
# backward 更新 gradients.
loss.backward()
# 减去大于1 的梯度,将其设为 1.0, 以防梯度爆炸.
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# 更新模型参数
optimizer.step()
# 更新 learning rate.
scheduler.step()
logit = logits.detach().cpu().numpy()
label_id = b_labels.to('cpu').numpy()
# 计算training 句子的准确度.
total_train_accuracy += flat_accuracy(logit, label_id)
# 计算batches的平均损失.
avg_train_loss = total_train_loss / len(train_dataloader)
# 训练集的准确率.
avg_train_accuracy = total_train_accuracy / len(train_dataloader)
print(" 训练准确率: {0:.2f}".format(avg_train_accuracy))
print(" 平均训练损失 loss: {0:.2f}".format(avg_train_loss))
return avg_train_accuracy,avg_train_loss
def train_evalution(test_dataloader,model):
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0
model.eval()
for batch in test_dataloader:
# `batch` 包括3个 tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
b_input_ids = batch[0].to(device)
b_input_mask = batch[1].to(device)
b_labels = batch[3].to(device)
# 在valuation 状态,不更新权值,不改变计算图
with torch.no_grad():
# 参考 https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
(loss, logits) = model(b_input_ids,
token_type_ids=None,
attention_mask=b_input_mask,
labels=b_labels)
# 计算 validation loss.
total_eval_loss += loss.item()
logit = logits.detach().cpu().numpy()
label_id = b_labels.to('cpu').numpy()
# 计算 validation 句子的准确度.
total_eval_accuracy += flat_accuracy(logit, label_id)
# 计算 validation 的准确率.
avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
return avg_val_accuracy,total_eval_loss,len(test_dataloader)
import os
import random
import numpy as np
from transformers import WEIGHTS_NAME, CONFIG_NAME
from torch.utils.tensorboard import SummaryWriter
output_dir = "./models/"
output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
output_config_file = os.path.join(output_dir, CONFIG_NAME)
# 代码参考 https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
writer = SummaryWriter("./log_models/")
# 设置随机种子.
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
# 设置总时间.
total_t0 = time.time()
best_val_accuracy = 0
for epoch_i in range(0, epochs):
print('Epoch {:} / {:}'.format(epoch_i + 1, epochs))
# ========================================
# training
# ========================================
t0 = time.time()
avg_train_accuracy,avg_train_loss = training(train_dataloader,bert_model)
# 计算训练时间.
training_time = format_time(time.time() - t0)
print(" 训练时间: {:}".format(training_time))
# ========================================
# Validation
# ========================================
t0 = time.time()
avg_val_accuracy,total_eval_loss,valid_dataloader_length = train_evalution(validation_dataloader,bert_model)
print("")
print(" 测试准确率: {0:.2f}".format(avg_val_accuracy))
if avg_val_accuracy > best_val_accuracy:
best_val_accuracy = avg_val_accuracy
torch.save(bert_model.state_dict(),output_model_file)
bert_model.config.to_json_file(output_config_file)
tokenizer.save_vocabulary(output_dir)
# 计算batches的平均损失.
avg_val_loss = total_eval_loss / valid_dataloader_length
# 计算validation 时间.
validation_time = format_time(time.time() - t0)
print(" 平均测试损失 Loss: {0:.2f}".format(avg_val_loss))
print(" 测试时间: {:}".format(validation_time))
writer.add_scalars(f'Acc/Loss', {
'Training Loss': avg_train_loss,
'Valid Loss': avg_val_loss,
'Valid Accur': avg_val_accuracy
}, epoch_i+1)
print("训练一共用了 {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
writer.close()
Epoch 1 / 4
Batch 40 of 63. Elapsed: 0:00:13.
训练准确率: 0.98
平均训练损失 loss: 0.20
训练时间: 0:00:21
测试准确率: 0.89
平均测试损失 Loss: 0.45
测试时间: 0:00:05
Epoch 2 / 4
Batch 40 of 63. Elapsed: 0:00:13.
训练准确率: 0.98
平均训练损失 loss: 0.19
训练时间: 0:00:20
测试准确率: 0.89
平均测试损失 Loss: 0.45
测试时间: 0:00:04
Epoch 3 / 4
Batch 40 of 63. Elapsed: 0:00:12.
训练准确率: 0.99
平均训练损失 loss: 0.18
训练时间: 0:00:19
测试准确率: 0.89
平均测试损失 Loss: 0.46
测试时间: 0:00:04
Epoch 4 / 4
Batch 40 of 63. Elapsed: 0:00:12.
训练准确率: 0.98
平均训练损失 loss: 0.20
训练时间: 0:00:19
测试准确率: 0.89
平均测试损失 Loss: 0.46
测试时间: 0:00:04
训练一共用了 0:01:35 (h:mm:ss)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-nm1fHIsn-1595507910506)(attachment:image.png)]
avg_val_accuracy,_,_ = train_evalution(test_dataloader,bert_model)
avg_val_accuracy
0.8988095238095238