xml文件:
<data>
<KHXX_INFO>
<KHXX>
<XM>taogeXM>
<DHHM>13242008556DHHM>
KHXX>
<KHXX>
<XM>taoge1XM>
<DHHM>13242008557DHHM>
KHXX>
KHXX_INFO>
<XMXX_INFO>
<XMXX>
<XMBH>DB111111XMBH>
XMXX>
<XMXX>
<XMBH>DB222222XMBH>
XMXX>
XMXX_INFO>
<country_info name="Liechtenstein">
<country>
<rank>1rank>
<year>2008year>
<gdppc>141100gdppc>
<neighbor name="Austria" direction="E"/>
country>
<country name="Singapore">
<rank>4rank>
<year>2011year>
<gdppc>59900gdppc>
<neighbor name="Malaysia" direction="N"/>
country>
country_info>
data>
import xml.etree.ElementTree as et
class ParseXML:
"""
解析 XML
"""
def __init__(self, file, cust_no, apply_no):
# 初始化属性
print('parse xml...')
if file:
self.file = file
self.cust_no = cust_no
self.apply_no = apply_no
def get_root(self):
"""
解析整个树对象,获取 tree、root
:return:
"""
# 1. parse 方法解析 xml 文件,返回整个树对象 ElementTree
# tree = et.parse('example.xml')
tree = et.parse(self.file)
# 2. getroot 获取树对象的根结点对象 Element
root = tree.getroot()
'''
root.tag # 结点名称--str
root.attrib # 结点属性--dict
xxx.text # 最后一级元素的值
'''
return root
def get_child_khxx(self):
"""
查找 khxx_info 节点中包含 cust_no 的子节点
:return:
"""
root = self.get_root()
'''
# 3.1 遍历根结点,通过多级索引获取每个子结点
for child in root:
print(child.tag, child.attrib, child[0][0].tag, child[0][0].text)
'''
# 3.2 iter 方法直接遍历根结点中指定名称的子/孙结点
for khxx in root.find('KHXX_INFO'):
if self.cust_no == khxx.find('XM').text:
return et.tostring(khxx)
else:
return ''
'''
def get_child_xmxx(self):
"""
查找 xmxx_info 节点中包含 apply_no 的子节点
:return:
"""
root = self.get_root()
for xmxx in root.find('XMXX_INFO'):
xmbh = xmxx.find('XMBH').text
if xmbh.find(self.apply_no) != -1:
return et.tostring(xmxx)
else:
return ''
'''
parsexml = ParseXML('example.xml', 'taoge1', 1)
parsexml.get_child_khxx()
# parsexml.get_child_xmxx()
'''
# 3.3 findall 方法获取当前节点的子结点中指定的所有元素
# 不能查找孙结点
# find 方法只获取当前节点指定的第一个子结点
for child in root:
# print(child)
for khxx in child.findall('KHXX'):
print(khxx.tag)
# 写 xml
# 修改、添加元素
for rank in root.iter('rank'):
new_rank = int(rank.text) + 1
rank.text = str(new_rank) # 修改元素值
rank.set('updated', 'yes') # 设置属性值
tree.write('output.xml') # write 把 xml 树对象写到文件中
# remove 删除元素
# 删除包含 rank > 3 元素的节点
for country_info in root.findall('country_info'):
for country in country_info.findall('country'):
rank = int(country.find('rank').text)
if rank > 3:
country_info.remove(country)
tree.write('output.xml')
# tostring()函数将 root 对象转化为字符串
xml_str = et.tostring(root)
print(xml_str)
# fromstring()函数使用字符串重新构造一个Element对象,并赋值给root变量
root = et.fromstring(xml_str)
print(root.tag)
'''