1.给定一个文件夹,里面还有多个子文件夹
2.根据.xml文件,自动统计不同细胞的个数
1.将.xml文件转化为.csv文件
2.利用value_counts()计算
第1段:主要是将.xml文件转为.csv
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['name', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
第2段:我在桌面上建立一个文件夹,命名为csv,该文件用来存.csv文件,其路径为:C:/Users/晓/Desktop/csv/
dirs = []
image_path = '图片的路径'
for info in os.listdir(image_path):
print(info)
dirs.append(info)
print(dirs)
i = 1
for dir in dirs:
#print (dir)
path = image_path + dir
#print(path)
xml_df = xml_to_csv(path)
save_path = 'C:/Users/晓/Desktop/csv/' + dir + '.csv'
#print(save_path)
xml_df.to_csv(save_path, index=None)
print('序号{}:'.format(i)+"Successfully converted xml to csv.")
i = i + 1
第3段:将各个.csv文件转为一个总.csv文件,主要是为了计算所有子文件夹细胞的总数
import os
import pandas as pd
lcvs=[]
for info in os.listdir('C:/Users/晓/Desktop/csv/'):
domain = os.path.abspath(r'C:/Users/晓/Desktop/csv/') #获取文件夹的路径
info = os.path.join(domain,info) #将路径与文件名结合起来就是每个文件的完整路径
lcvs.append(pd.read_csv(info,engine='python'))
nf=pd.concat(lcvs)
nf.to_csv('C:/Users/晓/Desktop/csv/all.csv')
第4段:计算每一类细胞的个数,会在桌面上生成一个result.txt文件
import os
import pandas as pd
texts = []
for info in os.listdir('C:/Users/晓/Desktop/csv/'):
domain = os.path.abspath(r'C:/Users/晓/Desktop/csv') #获取文件夹的路径
#print(info)
texts.append(info)
info = os.path.join(domain,info) #将路径与文件名结合起来就是每个文件的完整路径
data = pd.read_csv(info,engine='python')
n2=data.name.value_counts()
n3="totall" + " " + str(data.shape[0])
texts.append(n2)
texts.append(n3)
with open('C:/Users/晓/Desktop/result.txt', 'a', encoding='utf8') as f:
strtexts = str(texts)
strtexts =strtexts.replace('Name: name, dtype: int64,', '').replace(',','\n').replace('[','').replace(']','').replace('','')
print(strtexts)
f.write(strtexts + "\r\n")