【Python】如何对xml文件进行新增、修改和删除等操作

文章目录

  • 1 按照示例文件内容新增xml文件
  • 2 查看并修改xml文件内容
  • 3 删除xml文件内容

xml文件示例:

"1.0"?>
<data>
    false</disabled>
    "Liechtenstein">
        1</rank>
        2008</year>
        141100</gdppc>
        "Austria" direction="E"/>
        "Switzerland" direction="W"/>
    </country>
    "Singapore">
        4</rank>
        2011</year>
        59900</gdppc>
        "Malaysia" direction="N"/>
    </country>
    "Panama">
        68</rank>
        2011</year>
        13600</gdppc>
        "Costa Rica" direction="W"/>
        "Colombia" direction="E"/>
    </country>
    
    .tasks.Shell>
        echo "Hello world!"</command>
    </hudson.tasks.Shell>
    </builders>
</data>

1 按照示例文件内容新增xml文件

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author: Sudley
# ctime: 2020/02/16

import xml.etree.ElementTree as ET

def create_Xml(xml_file):
    #创建包含root标签的xml文件
    with open(xml_file,'w') as f:
        f.write('\n')
        f.write('\n')
        f.write('\n')

    #使用ET模块对xml文件进行解析
    tree = ET.parse(xml_file)
    root = tree.getroot()
    #创建disabled标签
    SubElement_disabled = ET.SubElement(root,'disabled')
    SubElement_disabled.text = 'false'

    #创建第一个country标签
    SubElement_country0 = ET.SubElement(root,'country',attrib={'name':'"Liechtenstein"'})  #添加含attrib的标签,atrib后面接的是字典格式的
    SubElement_country0_rank = ET.SubElement(SubElement_country0,'rank')
    SubElement_country0_rank.text = '1'       #配置text,注意不能直接用int类型的
    SubElement_country0_year = ET.SubElement(SubElement_country0,'year')
    SubElement_country0_year.text = '2008'
    SubElement_country0_gdppc = ET.SubElement(SubElement_country0,'gdppc')
    SubElement_country0_gdppc.text = '141100'
    SubElement_country0_neighbor0 = ET.SubElement(SubElement_country0,'neighbor',attrib={'name':'Austria','direction':'E'})
    SubElement_country0_neighbor1 = ET.SubElement(SubElement_country0,'neighbor',attrib={'name':'Switzerland','direction':'W'})

    #创建二个country标签
    SubElement_country1 = ET.SubElement(root,'country',attrib={'name':'Singapore'})
    SubElement_country1_rank = ET.SubElement(SubElement_country1,'rank')
    SubElement_country1_rank.text = '4'
    SubElement_country1_year = ET.SubElement(SubElement_country1,'year')
    SubElement_country1_year.text = '2011'
    SubElement_country1_gdppc = ET.SubElement(SubElement_country1,'gdppc')
    SubElement_country1_gdppc.text = '59900'
    SubElement_country1_neighbor0 = ET.SubElement(SubElement_country1,'neighbor',attrib={'name':'Malaysia','direction':'N'})

    #创建三个country标签
    SubElement_country2 = ET.SubElement(root,'country',attrib={'name':'Panama'})
    SubElement_country2_rank = ET.SubElement(SubElement_country2,'rank')
    SubElement_country2_rank.text = '68'
    SubElement_country2_year = ET.SubElement(SubElement_country2,'year')
    SubElement_country2_year.text = '2011'
    SubElement_country2_gdppc = ET.SubElement(SubElement_country2,'gdppc')
    SubElement_country2_gdppc.text = '13600'
    SubElement_country2_neighbor0 = ET.SubElement(SubElement_country2,'neighbor',attrib={'name':'Costa Rica','direction':'W'})
    SubElement_country2_neighbor1 = ET.SubElement(SubElement_country2,'neighbor',attrib={'name':'Colombia','direction':'E'})

    #创建builders标签
    SubElement_builders = ET.SubElement(root,'builders')
    SubElement_builders_Shell = ET.SubElement(SubElement_builders,'hudson.tasks.Shell')
    SubElement_builders_Shell_command = ET.SubElement(SubElement_builders_Shell,'command')
    SubElement_builders_Shell_command.text = 'echo "Hello world!"'

    #上面创建的内容都在一行上面显示,不利于我们的查看,对标签执行美化,标签前面添加是的的缩进
    prettyXml(root, '    ', '\n')            #执行美化方法
    ET.dump(root)                 #显示出美化后的XML内容

    tree.write(xml_file)                   #将修改写入本地xml文件

def prettyXml(element, indent, newline, level = 0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
    if element:  # 判断element是否有子元素
        if element.text == None or element.text.isspace(): # 如果element的text没有内容
            element.text = newline + indent * (level + 1)
        else:
            element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
    #else:  # 此处两行如果把注释去掉,Element的text也会另起一行
        #element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
    temp = list(element) # 将elemnt转成list
    for subelement in temp:
        if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
            subelement.tail = newline + indent * (level + 1)
        else:  # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
            subelement.tail = newline + indent * level
        prettyXml(subelement, indent, newline, level = level + 1) # 对子元素进行递归操作


xml_file = '/tmp/template.xml'
create_Xml(xml_file)

2 查看并修改xml文件内容

查看所有的neighbor信息,并把attrib属性中’direction’为‘E’的修改为‘East’

>>> for neighbor in root.iter('neighbor'):
...   if neighbor.attrib['direction'] == 'E':
...     neighbor.attrib['direction'] = 'East'
...   print(neighbor.attrib)
...
{'direction': 'East', 'name': 'Austria'}
{'direction': 'W', 'name': 'Switzerland'}
{'direction': 'N', 'name': 'Malaysia'}
{'direction': 'W', 'name': 'Costa Rica'}
{'direction': 'East', 'name': 'Colombia'}

获取country name和rank属性

>>> for country in root.findall('country'):
...   rank = country.find('rank').text
...   name = country.get('name')
...   print(name, rank)
...
Liechtenstein 1
Singapore 4
Panama 68

在rank标签中新增attrib属性

>>> for rank in root.iter('rank'):
...   new_rank = int(rank.text) + 1
...   rank.text = str(new_rank)
...   rank.set('updated', 'yes')
...
>>> tree.write('/tmp/output.xml')

修改后的xml文件如下:

<data>
    false</disabled>
    "Liechtenstein">
        "yes">2</rank>
        2008</year>
        141100</gdppc>
        "East" name="Austria" />
        "W" name="Switzerland" />
    </country>
    "Singapore">
        "yes">5</rank>
        2011</year>
        59900</gdppc>
        "N" name="Malaysia" />
    </country>
    "Panama">
        "yes">69</rank>
        2011</year>
        13600</gdppc>
        "W" name="Costa Rica" />
        "East" name="Colombia" />
    </country>
    
        .tasks.Shell>
            echo "Hello world!"</command>
        </hudson.tasks.Shell>
    </builders>
</data>

3 删除xml文件内容

删除rank > 3的country,删除builders下的hudson.tasks.Shell标签,并配置text为deleted

>>> for country in root.findall('country'):
...   rank = int(country.find('rank').text)
...   if rank > 3:
...     root.remove(country)
...
>>> for builders in root.findall('builders'):
...   for shell in builders.findall('hudson.tasks.Shell'):
...     builders.remove(shell)
...   builders.text = 'deleted'
...
>>> tree.write('/tmp/output.xml')

修改后的xml文件:

<data>
    false</disabled>
    "Liechtenstein">
        "yes">2</rank>
        2008</year>
        141100</gdppc>
        "East" name="Austria" />
        "W" name="Switzerland" />
    </country>
    deleted</builders>
</data>

当一个页签下面有多个子标签时使用remove删除发现一次删不全
比如下面文件

<hudson.model.ListView>
  <name>test_all</name>
  <jobNames>
    <comparator class="hudson.util.CaseInsensitiveComparator" />
    <string>compile</string>
    <string>get_node_list</string>
    <string>job_data</string>
    <string>new_job</string>
    <string>pipeline0</string>
    <string>pipeline1</string>
    <string>template</string>
    <string>test_1</string>
    <string>test_2</string>
    <string>test_3</string>
  </jobNames>
  <jobFilters />
  <recurse>false</recurse>
</hudson.model.ListView>

想删除jobNames标签下的所有string标签,可先获取string的数量num然后执行num次删除操作,删除部分代码参考如下

root = tree.getroot()
for i in range(0,self.num):
    for jobNames in root.findall('jobNames'):
        for string in jobNames:
            if string.text:
                jobNames.remove(string)

参考文献
https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.SubElement
Python使用ElementTree处理XML的美化

你可能感兴趣的:(Python)