1.ann
2.代码
import os
import re
import pandas as pd
import numpy as np
import random
import math
from datetime import datetime
from matplotlib import pyplot as plt
train_dir='../../siriyang/中医药命名实体识别/dataset/train'
test_dir='../../siriyang/中医药命名实体识别/dataset/chusai_xuanshou'
prepare_dir='./prepare'
def get_entitie(dir):
entities={} #用来存储实体名,创建为字典类别
files=os.listdir(dir)
files=list(set([file.split('.')[0] for file in files if (".ann" in file or ".txt" in file) ]))
for file in files:
path=os.path.join(dir,file+'.ann')
with open(path,'r',encoding='utf8') as f:
for line in f.readlines():
name=line.split('\t')[1].split(' ')[0]
#以\t制表符,为分隔符,取第二位,再在第二位中,以‘ ’空格为分隔符,取第一位,例如第一行取DRUG_EFFICACY
if name in entities:#统计每个实体的个数
entities[name]+=1
else:
entities[name]=1
return entities
count=get_entitie(train_dir)#调用函数
print(count)
print([i for i in count.keys()])#取出键
print([i for i in count.values()])#取出值
3.结果
def autolabel(rects):
for rect in rects:
height =rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2, height, height, ha='center', va='bottom')#垂直和水平的布局
#rect.get_x(),1.03*height,'%s' % int(height))
plt.xticks(range(len(num_list)), name_list, rotation=80)#rotation=80是横坐标的倾斜度
name_list = [i for i in count.keys()]
num_list = [i for i in count.values()]
plt.figure(figsize=(10,5))#画布大小
plt.title('Category statistics of entities',fontsize=13)#设置标题,footsize是设置字体大小
plt.xlabel(u'category',fontsize=13)#设置x轴的标题,以及它的字号大小
plt.ylabel(u'quantity',fontsize=13)#设置y轴的标题,以及它的字号
#plt.bar(data[i for i in count.keys()],data[i for i in count.values()],alpha=0.6,width=0.8,facecolor='deeppink',edgecolor='darkblue',w=1,label='number of class')
autolabel(plt.bar(range(len(num_list)),num_list,width=0.8,edgecolor='darkblue',lw=1))#edgecolor:柱子轮廓色;lw:柱子轮廓的宽度;
fig=plt.gcf()
#plt.legend(loc=2)
plt.show()
fig.savefig('./Category statistics of entities.png')#把图例保存到当前的目录上
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
def draw_pie(labels,quants):
plt.figure(1,figsize=(10,10))
expl = [0,0,0,0,0,0,0,0,0,0,0,0,0]#第五块离开圆心一点点
colors = ["blue","red","coral","green","yellow","orange"]#设置颜色,可循环显示
plt.pie(quants,explode=expl,colors=colors,labels=labels,autopct='%1.1f%%',pctdistance=0.8,shadow=True)#autopct:百分数的格式
plt.title('length of the entity',bbox={'facecolor':'0.8','pad':5})
fig=plt.gcf()
plt.show()
fig.savefig("./pie.jpg")
plt.close()
labels = [i for i in dic.keys()]
quants = [i for i in dic.values()]
draw_pie(labels,quants)
直方图博客链接分享
饼状图链接分享