将有父子级的dict转换成有父子级的excel
(当前处理方式只适用于层数较少时,层数多了就会很麻烦,且原始数据中还有部分需要与字典值进行对应,所以代码并不纯粹,后续有时间再进行精简)
- 我这边的数据字典结构数据多少不定,层级多少不定,不过最多只有三层
- 先将所有的父子级别的字典按照|分隔符号生成新的字典
{
父级|子级|孙级别:具体值
}
- 将上一步骤得到的新的字典通过pandas生成一个DataFrame对象
- 将空值赋值其他默认值
- split_r = pf.to_dict(orient='split')将DataFrame转回一个新的字典,得到三块列名,行数,数据(二维数组)
- 再进行循环将列名拆分开分成三级,然后生成新的DataFrame对象
- 将DataFrame转换成excel
之所以会分那么多步骤,是因为字典数据不定长,通过pandas的DataFrame对象,能帮助将不定长数据也按照对应填充
import pandas as pd
import numpy as np
def result_data_processing(result_data, result_dict):
"""
将数据多层数据处理成键值对应
:param result_data: 原始数据
:param result_dict: 原始数据字典
:return: 处理好的数据
"""
# 生产结果字典列表
excel_dict_list = []
# excel名称列表
listing_list = []
# 遍历第一层患者基本信息
for patient_key, patient_value in result_data.items():
excel_dict = {}
excel_dict["患者ID"] = patient_key
# 遍历第二层crf文档
for crf_key_id, crf_value in patient_value.items():
# 有部分数据主表中没有,但是子表中有
if crf_key_id not in result_dict.keys():
result_dict[crf_key_id] = crf_key_id
crf_key = result_dict[crf_key_id]
# 粒度是页面,会有下一层
if type(crf_value) == dict:
# 遍历第三层 页面
for page_key, page_value in crf_value.items():
# 如果粒度是截面
if type(page_value) == dict:
# 遍历第四层 截面
for section_key, section_value in page_value.items():
excel_dict[crf_key + "|" + page_key + "|" + section_key] = section_value
else:
excel_dict[crf_key + "|" + page_key] = page_value
else:
excel_dict[crf_key] = crf_value
excel_dict_list.append(excel_dict)
# 生成名称顺序列表
for excel_dict_key in excel_dict.keys():
if excel_dict_key not in listing_list:
listing_list.append(excel_dict_key)
return [excel_dict_list, listing_list]
# 先将所有的父子级别的字典按照|分隔符号生成新的字典
excel_dict_list = result_data_processing(yuan, {"1": "1111"})
# 将字典列表转换为DataFrame
pf = pd.DataFrame(list(excel_dict_list[0]))
# 指定列名顺序
pf = pf[excel_dict_list[1]]
# 将DataFrame中空的替换成固定值
pf.fillna('0.0%',inplace=True)
# DataFrame按照要求转dict
split_r = pf.to_dict(orient='split')
# 拿到所有字典的key-也是DataFrame的列名
columns_name = split_r["columns"]
# 拿到所有数据
all_data = split_r["data"]
# 创建新的结果字典
columns_one=[]
columns_two=[]
columns_three=[]
for columns in columns_name:
if columns == "患者ID":
columns_one.append(columns)
columns_two.append(columns)
columns_three.append(columns)
else:
columns_tree = columns.split("|")
if len(columns_tree) == 1:
columns_one.append(columns_tree[0])
elif len(columns_tree) == 2:
columns_one.append(columns_tree[0])
columns_two.append(columns_tree[1])
elif len(columns_tree) == 3:
columns_one.append(columns_tree[0])
columns_two.append(columns_tree[1])
columns_three.append(columns_tree[2])
# 将二维数组转换称numpy的二维数组
numpy_data = np.array(all_data)
if columns_three == ["患者ID"] and columns_two != ["患者ID"]:
pf = pd.DataFrame(numpy_data, index=split_r["index"],
columns=[columns_one, columns_two])
elif columns_two == ["患者ID"]:
pf = pd.DataFrame(numpy_data, index=split_r["index"],
columns=[columns_one])
else:
pf = pd.DataFrame(numpy_data, index=split_r["index"],
columns=[columns_one, columns_two, columns_three])
print(pf)
file_path = pd.ExcelWriter('test2.xlsx')
# 替换空单元格
pf.fillna('0.0%', inplace=True)
# 输出
pf.to_excel(file_path, encoding='utf-8', index=True)
# 保存表格
file_path.save()
我这边的数据结构
{
"26404112": {
"e216b428-e123-492a-af79-c9ffe72271a9": {
"产检情况": {
"孕产妇基本情况": "85.71%",
"孕周": "100.0%",
"不适主诉": "100.0%",
"妊娠合并症": "0.0%"
},
"实验室检查": {
"血脂": "100.0%",
"24小时尿蛋白定量": "100.0%",
"尿常规": "94.12%",
"肾功能": "85.71%",
"血糖及微量元素": "80.0%",
"凝血功能": "100.0%",
"肝功能": "100.0%",
"电解质": "87.5%",
"甲状腺功能": "85.71%",
"血常规": "100.0%"
},
"影像学检查": {
"产科B超检查": "75.0%",
"心电图检查": "100.0%"
},
"患者信息": {
"不良嗜好": "50.0%",
"基础信息": "100.0%",
"家族史": "0.0%",
"月经婚育史": "92.5%",
"既往史": "93.55%"
}
},
"8e078749-f263-43be-a371-d8ccf4656ac2": {
"产检和实验室检查": {
"产科检查": "53.33%",
"孕产妇基本情况": "0.0%",
"尿常规": "87.5%",
"妊娠合并症": "0.0%",
"孕周": "100.0%",
"不适主诉": "100.0%"
}
}
},
"111111": {
"e216b428-e123-492a-af79-c9ffe72271a9": {
"产检情况": {
"孕产妇基本情况": "85.71%",
"孕周": "100.0%",
"不适主诉": "100.0%",
"妊娠合并症": "0.0%"
},
"患者信息": {
"不良嗜好": "50.0%",
"基础信息": "100.0%",
"家族史": "0.0%",
"月经婚育史": "92.5%",
"既往史": "93.55%"
}
},
"e1190c2d-0032-477c-b051-6ffdffb208b6": {
"第一页": {
"疑似子痫前期:满足>1项检查发现": "100.0%",
"排除:明显的子痫前期、确诊的HELLP综合症、90天内服用过药物": "100.0%",
"疑似子痫前期:首次出现尿蛋白": "100.0%",
"疑似子痫前期:满足>1项症状": "100.0%",
"患者信息": "57.14%",
"排除:患者登记之后进一步排除部分病例": "100.0%",
"疑似子痫前期:血压首次升高": "85.19%",
"疑似子痫前期:血压进一步升高": "63.64%"
}
}
"201444cc-3788-43ef-bcd4-3fcdb3555e6b": {
"辅助检查": {
"肝功能": "100.0%",
"血常规": "100.0%",
"肾功能": "85.71%",
"电解质": "0.0%",
"凝血功能": "92.31%",
"心电图检查": "50.0%",
"血脂": "87.5%",
"尿常规": "85.71%"
},
"产检信息": {
"妊娠合并症": "0.0%",
"孕周": "0.0%",
"产科检查": "87.5%",
"不适主诉": "100.0%",
"孕产妇基本情况": "0.0%"
}
}
"8e078749-f263-43be-a371-d8ccf4656ac2": {
"产检和实验室检查": {
"产科检查": "53.33%",
"孕产妇基本情况": "0.0%",
"尿常规": "87.5%",
"妊娠合并症": "0.0%",
"孕周": "100.0%",
"不适主诉": "100.0%"
}
}
}
}