import torch
# from pytorch_transformers import *
from pytorch_transformers import BertModel,BertTokenizer,AdamW,BertForTokenClassification
import torch.nn as nn
import pytorch_transformers
torch.__version__
import pandas as pd
from torch.utils.data import DataLoader,dataset
import time
PyTorch-Transformers has a unified API
for 7 transformer architectures and 30 pretrained weights.
Model(模型) | Tokenizer(标记生成器) | Pretrained weights shortcut(预训练权重)
# MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),
# (OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'),
# (GPT2Model, GPT2Tokenizer, 'gpt2'),
# (TransfoXLModel, TransfoXLTokenizer, 'transfo-xl-wt103'),
# (XLNetModel, XLNetTokenizer, 'xlnet-base-cased'),
# (XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024'),
# (RobertaModel, RobertaTokenizer, 'roberta-base')]
# Let's encode some text in a sequence of hidden-states using each model:
# for model_class, tokenizer_class, pretrained_weights in MODELS:
# # Load pretrained model/tokenizer
# tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
# model = model_class.from_pretrained(pretrained_weights)
# # Encode text
# input_ids = torch.tensor([tokenizer.encode("Here is some text to encode ", add_special_tokens=True)])
# print("input_ids = ",input_ids)
# # Add special tokens takes care of adding [CLS], [SEP], ... tokens in the right way for each model.
# with torch.no_grad():
# last_hidden_states = model(input_ids)[0] # Models outputs are now tuples
# print("last_hidden_states = ",last_hidden_states)
# print(last_hidden_states.size())
# break #这里因为只需要BERT模型所以打断
# pretrained_weights = 'bert-base-chinese'
# model = BertModel.from_pretrained(pretrained_weights)
# tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
# print("bert_model = ",model)
# print("bert_tokenizer = ",tokenizer)
# BERT_MODEL_CLASSES = [BertModel, BertForPreTraining, BertForMaskedLM, BertForNextSentencePrediction,
# BertForSequenceClassification, BertForMultipleChoice, BertForTokenClassification,
# BertForQuestionAnswering]
# All the classes for an architecture can be initiated from pretrained weights for this architecture
# Note that additional weights added for fine-tuning are only initialized
# and need to be trained on the down-stream task
# pretrained_weights = 'bert-base-chinese'
# model = BertModel.from_pretrained(pretrained_weights,
# output_hidden_states = True,
# output_attentions = True
# )
# tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
# input_ids = torch.tensor([tokenizer.encode("让我们看看在这个文本中的隐层和感知层")]) #一句话18个字
# print(input_ids) #tensor([[6375, 2769, 812, 4692, 4692, 1762, 6821, 702, 3152, 3315, 704, 4638,7391, 2231, 1469, 2697, 4761, 2231]])
# print(input_ids.size()) #torch.Size([1, 18])
# out = model(input_ids)
# print("len(out) = ",len(out)) # 4
# #输出数据是四个维度的元组,
# # (1) 层数(12层) -- 模型最后一层输出处的隐藏状态序列(batch_size,sequence_length,hidden_size)
# # (2)batch(这里是1句)
# # (3)单词/令牌号 #(一个用于每个层的输出+嵌入的输出)的列表(batch_size,sequence_length,hidden_size)
# # (4)隐藏单元/特征号(768个特征)
# all_hidden_states, all_attentions = out[-2:]
# last_hidden_states = out[1] # The last hidden-state is the first element of the output tuple
# print("len(out[0]) = ",len(out[0])," out[0].size() = ",out[0].size()) #1,torch.Size([1, 18, 768])
# print("len(out[1]) = ",len(out[1])," out[1].size() = ",out[1].size()) #1,torch.Size([1, 768])
# print("len(all_hidden_states) = ",len(all_hidden_states)) #13
# print("len(all_attentions) = ",len(all_attentions)) #12
# print("all_hidden_states[-1].size() = ",all_hidden_states[-1].size()) #【1,18,768】=【batch,词语数量 ,词向量维度】
# # print("all_attentions.size[-1].size() = ",all_attentions[0-1.size()) #【1,12,18,18】
# Models are compatible with Torchscript
# model = model_class.from_pretrained(pretrained_weights, torchscript=True)
# traced_model = torch.jit.trace(model, (input_ids,))
# print("traced_model = ",traced_model)
# Simple serialization for models and tokenizers
# model.save_pretrained('./modelsave/bert/save_model_1/') # save
# model = BertModel.from_pretrained('./modelsave/bert/save_model_1/') #reload
# tokenizer.save_pretrained('./modelsave/bert/save_token_1/') #save
# tokenizer = BertTokenizer.from_pretrained('./modelsave/bert/save_token_1/')
def read_data():
#读取数据
data_corpus = pd.read_excel('D:\pro\pytorch\goods_name_classfiction/keyword_all.xlsx') #读取全部关键词
corpus_list = list(data_corpus['keyword_all'].values) # 转化为列表,长度为22933
data_goods = pd.read_excel('D:\pro\pytorch\goods_name_classfiction/分词后数据.xlsx')
# print(data_goods)
return corpus_list,data_goods
#读取数据
corpus_list,data_goods = read_data()
classes_list = list(set(data_goods['一级分类'].values))
print(classes_list)
label_dict = {}
for i in range(len(classes_list)):
label_dict[ classes_list[i] ] = i
print(label_dict)
['日化用品', '粮油调味', '糖巧饼干', '香烟', '计生情趣', '方便速食', '汽水饮料', '牛奶乳品', '冷冻食品', '应季鲜食', '生鲜专区', '无酒不欢', '生活百货', '个人护理', '冲调保健', '休闲零食', '母婴用品']
{'日化用品': 0, '粮油调味': 1, '糖巧饼干': 2, '香烟': 3, '计生情趣': 4, '方便速食': 5, '汽水饮料': 6, '牛奶乳品': 7, '冷冻食品': 8, '应季鲜食': 9, '生鲜专区': 10, '无酒不欢': 11, '生活百货': 12, '个人护理': 13, '冲调保健': 14, '休闲零食': 15, '母婴用品': 16}
#建立数据集
class GoodsName_Label_Dataset(dataset.Dataset):
def __init__(self,data_goods,label_dict):
self.data_goods = data_goods
self.label_dict = label_dict
def __len__(self):
#返回数据长度
return self.data_goods.shape[0]
def __getitem__(self,ind):
sku_name = self.data_goods['更新后商品名称'][ind]
label = self.label_dict[self.data_goods['一级分类'][ind]]
label = torch.LongTensor([label])
return sku_name,label
#建立批处理、随机抽取器
def set_dataloader(data_goods,label_dict):
g_l_dataset = GoodsName_Label_Dataset(data_goods,label_dict)
DL = DataLoader(g_l_dataset,
batch_size=1,
shuffle = True)
return DL
dataloader = set_dataloader(data_goods,label_dict)
#建立模型
pretrained_weights = 'bert-base-chinese'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
# model = BertModel.from_pretrained(pretrained_weights)
model = BertModel.from_pretrained(pretrained_weights)
class Bert_Fc_Model(nn.Module):
def __init__(self):
super(Bert_Fc_Model,self).__init__()
self.model = model
self.tokenizer = tokenizer
self.fc = nn.Linear(768,17)
self.dropout = nn.Dropout(0.1)
def forward(self,input_str):
input_ids = torch.tensor([self.tokenizer.encode(input_str)])
# print("input_ids.size() = ",input_ids.size())
out = self.model(input_ids)
# out_0 = out[0]
# print("out[0].size() = ",out_0.size())
# out_0 = self.dropout(out_0)
# print("out[0].size() = ",out_0.size())
out = out[0][:,-1,:]
out = self.dropout(out)
out = self.fc(out)
return out
bert_model = Bert_Fc_Model()
#训练
bert_model.train()
print(bert_model)
def train():
bert_model = Bert_Fc_Model()
criterion = nn.CrossEntropyLoss()
optim = torch.optim.AdamW(bert_model.parameters(),)
correct_number = 0
time_start = time.time() #开始时间
for i ,item in enumerate(dataloader):
# print("i = ",i)
sku_name ,label = item
# print("sku_name = ",sku_name[0])
# print("label = ",label)
input_str = sku_name[0]
label = label.squeeze(dim=0)
# print("label = ",label)
#正向传播得到结果
out = bert_model(input_str)
# print("out = ",out)
# print("out = ",out)
#清零梯度
optim.zero_grad()
loss = criterion(out,label)
#反向传播
loss.backward()
#优化参数
optim.step()
# 计算准确率
predict = torch.argmax(out)
# print("predict label = ",predict,"label = ",label)
if label.item() == predict.item():
correct_number += 1
# print("i = ",i,correct_number)
if (i+1) % 200 == 0:
print( "loss = ", loss,"i+1 = ",i+1)
print("acc = " ,str(round(correct_number/200.0*100,2)) ,r"%" )
#清空计分
correct_number = 0
print("总耗时:",time.time()-time_start)
Bert_Fc_Model(
(model): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(21128, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
(fc): Linear(in_features=768, out_features=17, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
if __name__ == "__main__":
train()
loss = tensor(0.8110, grad_fn=) i+1 = 200
acc = 14.0 %
总耗时: 205.16717767715454
loss = tensor(2.9961, grad_fn=) i+1 = 400
acc = 13.5 %
总耗时: 424.97985672950745
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
in
1 if __name__ == "__main__":
----> 2 train()
in train()
30 loss.backward()
31 #优化参数
---> 32 optim.step()
33
34 # 计算准确率
D:\anaconda\envs\python37-pytorch\lib\site-packages\torch\optim\adamw.py in step(self, closure)
104 denom = max_exp_avg_sq.sqrt().add_(group['eps'])
105 else:
--> 106 denom = exp_avg_sq.sqrt().add_(group['eps'])
107
108 bias_correction1 = 1 - beta1 ** state['step']
KeyboardInterrupt: