对于标注后的数据,我们希望可以关注到类别的分布是否均匀。由此,本文整理了对于标签格式为 txt、json 两种情况的数据进行类别统计的代码。
import matplotlib.pyplot as plt
import os
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
cn_path=open("") # 存有类别的文本路径,如:"C:/Users/Admin/Desktop/classes.txt"
classes=[i.replace("\n","") for i in cn_path.readlines()]
print(classes)
class_dict = {i: 0 for i in classes}
print("类别数",len(class_dict))
def main(base_path):
FileList=os.listdir(base_path)
for file in FileList:
if file == "classes.txt":
continue
with open(base_path+file,'r') as f:
for i in f.readlines():
i = i.split(' ') # 不切割的话,下面的 i[0] 只读取字符串的首字符
class_dict[classes[int(i[0])]]+=1
fig, ax = plt.subplots(figsize=(10, 8))
plt.title('数量')
plt.xticks(rotation=90) # 设置x轴文字方向旋转90度
bars = plt.bar(class_dict.keys(), class_dict.values())
# 绘制柱形图部分,参考博客:https://blog.csdn.net/MyName_Guan/article/details/110390312
for b in bars:
height = b.get_height()
ax.annotate(f'{height}',
# xy控制的是,标注哪个点,x=x坐标+width/2, y=height,即柱子上平面的中间
xy=(b.get_x() + b.get_width() / 2, height),
xytext=(0, 3), # 文本放置的位置,如果有textcoords,则表示是针对xy位置的偏移,否则是图中的固定位置
textcoords="offset points", # 两个选项 'offset pixels','offset pixels'
va='bottom', ha='center' # 代表verticalalignment 和horizontalalignment,控制水平对齐和垂直对齐。
)
plt.savefig('./统计.png', # ⽂件名:png、jpg、pdf
dpi = 100, # 保存图⽚像素密度
# facecolor = 'violet', # 视图与边界之间颜⾊设置
# edgecolor = 'lightgreen', # 视图边界颜⾊设置
bbox_inches = 'tight')# 保存图⽚完整
plt.show()
if __name__ == '__main__':
base_path="" # 存放 txt 的目录,末尾的反斜杠不可少,如:"C:/Users/Admin/Desktop/txt_label/"
main(base_path)
import json
import os
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
read_classes = open(r"") # 存有类别的文本,如:"C:/Users/Admin/Desktop/classes.txt
classes_dict = {i.replace('\n', ''): 0 for i in read_classes.readlines()}
print('类别数:',len(classes_dict))
def main(base_path):
filelist = os.listdir(base_path)
filelist.sort()
for name in filelist:
filename = os.path.splitext(name)[0]
filename_suffix = os.path.splitext(name)[1]
if filename_suffix == ".json":
fullname = base_path + filename + filename_suffix
dataJson = json.load(open(f"{fullname}", encoding='UTF-8'))
label_name = dataJson["shapes"]
for _ in label_name:
classes_dict[_['label']]+=1
print(classes_dict)
# 绘制柱形图部分,参考博客:https://blog.csdn.net/MyName_Guan/article/details/110390312
fig, ax = plt.subplots(figsize=(10, 8))
plt.title('数量')
plt.xticks(rotation=90) # 设置x轴文字方向旋转90度
bars=plt.bar(classes_dict.keys(),classes_dict.values())
for b in bars:
height=b.get_height()
ax.annotate('{}'.format(height),
# xy控制的是,标注哪个点,x=x坐标+width/2, y=height,即柱子上平面的中间
xy=(b.get_x() + b.get_width() / 2, height),
xytext=(0, 3), # 文本放置的位置,如果有textcoords,则表示是针对xy位置的偏移,否则是图中的固定位置
textcoords="offset points", # 两个选项 'offset pixels','offset pixels'
va='bottom', ha='center' # 代表verticalalignment 和horizontalalignment,控制水平对齐和垂直对齐。
)
plt.savefig('./统计.png', # ⽂件名:png、jpg、pdf
dpi = 100, # 保存图⽚像素密度
# facecolor = 'violet', # 视图与边界之间颜⾊设置
# edgecolor = 'lightgreen', # 视图边界颜⾊设置
bbox_inches = 'tight')# 保存图⽚完整
plt.show()
if __name__ == '__main__':
base_path= "" # 存放 json 格式的目录,末尾反斜杠不可少,如:"C:/Users/Admin/Desktop/json_label/"
main(base_path)