【pandas 小记】pandas 读写xml文件


import xml.etree.ElementTree as ET
import pandas as pd

def iter_records(records):
    """
    遍历每个节点的生成器
    :param records:
    :return:
    """
    for record in records:
        temp_dict = {}  # 存储节点key-value
        for var in record:
            temp_dict[var.attrib['var_name']] = var.text
        # 生成值,即每个节点的数据
        yield temp_dict


def read_xml(xmlFileName):
    """
    读取xml数据,返回df
    :param xmlFileName:
    :return:
    """
    with open(xmlFileName, 'r') as xml_file:
        tree = ET.parse(xml_file)
        # 访问根节点
        root = tree.getroot()
        # 从根节点开始遍历,返回df
        return pd.DataFrame(list(iter_records(root)))


def xml_encode(row):
    """
    将每行数据转换成xml
    :param row:
    :return:
    """
    # 输出节点开始标签 ,名称可以根据实际需要修改
    xmlItem = ['  ']
    # 转换成xml格式
    for field in row.index:
        line = '  {1}'.format(field, row[field])
        xmlItem.append(line)

    # 输出节点结束标签
    xmlItem.append('  ')

    # 返回结果
    return '\n'.join(xmlItem)


def write_xml(xmlFileName, data):
    """
    将数据写入xml文件
    :param xmlFileName:
    :param data:
    :return:
    """
    with open(xmlFileName, 'w') as xmlfile:
        # 写头部
        xmlfile.write(
            ''
        )
        xmlfile.write('\n')   # 名称可以根据实际需要修改

        # 写数据
        xmlfile.write(
            '\n'.join(data.apply(xml_encode, axis=1))
        )
        xmlfile.write('\n')   # 名称可以根据实际需要修改


xml_filenane = r'E:\data\realEstate_trans.xml'
new_filename = r'E:\data\realEstate_trans_output.xml'
xml_df = read_xml(xml_filenane)    # 读取到df
print(xml_df)
# write_xml(new_filename, xml_df)  # 写入新xml文件

你可能感兴趣的:(pandas)