bert-base-chinese模型使用教程

向量编码和向量相似度展示
import torch
from transformers import BertTokenizer, BertModel
import numpy as np

model_name = "C:/Users/Administrator.DESKTOP-TPJL4TC/.cache/modelscope/hub/tiansz/bert-base-chinese"

sentences = ['春眠不觉晓', '大梦谁先觉', '浓睡不消残酒', '东临碣石以观沧海']

tokenizer = BertTokenizer.from_pretrained(model_name)
# print(type(tokenizer)) # 

model = BertModel.from_pretrained(model_name)
# print(type(model)) # 

def test_encode():
    input_ids = tokenizer.encode('春眠不觉晓', return_tensors='pt') # shape (1, 7)
    output = model(input_ids)
    print(output.last_hidden_state.shape)  # shape (1, 7, 768)
    v = torch.mean(output.last_hidden_state, dim=1)  # shape (1, 768)
    print(v.shape)  # shape (1, 768)
    print(output.pooler_output<

你可能感兴趣的:(人工智能)