python 解析 XML

xml文件:


<data>
    <KHXX_INFO>
        <KHXX>
            <XM>taogeXM>
            <DHHM>13242008556DHHM>
        KHXX>
        <KHXX>
            <XM>taoge1XM>
            <DHHM>13242008557DHHM>
        KHXX>
    KHXX_INFO>
    <XMXX_INFO>
        <XMXX>
            <XMBH>DB111111XMBH>
        XMXX>
        <XMXX>
            <XMBH>DB222222XMBH>
        XMXX>
    XMXX_INFO>
    <country_info name="Liechtenstein">
        <country>
            <rank>1rank>
            <year>2008year>
            <gdppc>141100gdppc>
            <neighbor name="Austria" direction="E"/>
        country>
        <country name="Singapore">
            <rank>4rank>
            <year>2011year>
            <gdppc>59900gdppc>
            <neighbor name="Malaysia" direction="N"/>
        country>
    country_info>
data>
import xml.etree.ElementTree as et

class ParseXML:
    """
    解析 XML
    """

    def __init__(self, file, cust_no, apply_no):
        # 初始化属性
        print('parse xml...')
        if file:
            self.file = file
        self.cust_no = cust_no
        self.apply_no = apply_no

    def get_root(self):
        """
        解析整个树对象,获取 tree、root
        :return:
        """
        # 1. parse 方法解析 xml 文件,返回整个树对象 ElementTree
        # tree = et.parse('example.xml')
        tree = et.parse(self.file)
        # 2. getroot 获取树对象的根结点对象 Element
        root = tree.getroot()
        '''
                root.tag # 结点名称--str
                root.attrib # 结点属性--dict
                xxx.text # 最后一级元素的值
        '''
        return root

    def get_child_khxx(self):
        """
        查找 khxx_info 节点中包含 cust_no 的子节点
        :return:
        """
        root = self.get_root()
        '''
        # 3.1 遍历根结点,通过多级索引获取每个子结点
        for child in root:
            print(child.tag, child.attrib, child[0][0].tag, child[0][0].text)
        '''
        # 3.2 iter 方法直接遍历根结点中指定名称的子/孙结点
        for khxx in root.find('KHXX_INFO'):
            if self.cust_no == khxx.find('XM').text:
                return et.tostring(khxx)
            else:
                return ''
'''
    def get_child_xmxx(self):
        """
        查找 xmxx_info 节点中包含 apply_no 的子节点
        :return:
        """
        root = self.get_root()
        for xmxx in root.find('XMXX_INFO'):
            xmbh = xmxx.find('XMBH').text
            if xmbh.find(self.apply_no) != -1:
                return et.tostring(xmxx)
            else:
                return ''
'''

parsexml = ParseXML('example.xml', 'taoge1', 1)
parsexml.get_child_khxx()
# parsexml.get_child_xmxx()

'''
# 3.3 findall 方法获取当前节点的子结点中指定的所有元素
# 不能查找孙结点
# find 方法只获取当前节点指定的第一个子结点
for child in root:
    # print(child)
    for khxx in child.findall('KHXX'):
        print(khxx.tag)

# 写 xml

# 修改、添加元素
for rank in root.iter('rank'):
    new_rank = int(rank.text) + 1
    rank.text = str(new_rank)  # 修改元素值
    rank.set('updated', 'yes')  # 设置属性值

tree.write('output.xml')  # write 把 xml 树对象写到文件中

# remove 删除元素
# 删除包含 rank > 3 元素的节点
for country_info in root.findall('country_info'):
    for country in country_info.findall('country'):
       rank = int(country.find('rank').text)
       if rank > 3:
            country_info.remove(country)

tree.write('output.xml')


# tostring()函数将 root 对象转化为字符串
xml_str = et.tostring(root)
print(xml_str)
# fromstring()函数使用字符串重新构造一个Element对象,并赋值给root变量
root = et.fromstring(xml_str)
print(root.tag)
'''

你可能感兴趣的:(python)