python 利用lxml 解析xml文件
运行下面程序之前首先保证你的运行环境已经支持python和lxml
如没有安装lxml 文件请参考:http://blog.csdn.net/zhaokuo719/article/details/8209496 进行安装lxml
#!/usr/bin/python
#--*--coding:UTF-8--*--
from lxml import etree
import re
def operationXML(xml_file,lastModparentNode,lastModChildNode=[]):
try:
parentNode = ""
allChildNodes = []
doc = etree.ElementTree(file = xml_file)
root = doc.getroot();
ns = getNameSpace(doc)
if ns != None:
parentNode = root.findall(ns+lastModparentNode,namespaces = None)
else:
parentNode = root.findall(lastModparentNode,namespaces = None)
if parentNode == None or len(parentNode) == 0:
print ("%s is emtpy"%(xml_file))
else:
for node_contents in parentNode:
childNode=[]
if len(lastModChildNode)!=0:
for childeNode in lastModChildNode:
node_text =""
if ns== None:
node_text = node_contents.find(childeNode)
else:
node_text = node_contents.find(ns+childeNode)
childNode.append(node_text.text)
else:
for childAll in list(node_contents):
childNode.append(childAll.text)
allChildNodes.append(childNode)
print allChildNodes
except Exception,e:
print e
#根据doc获得namespaces
def getNameSpace(doc):
ns = None
try:
root = doc.getroot()
r = re.compile('({.+})')
if r.search(root.tag)!=None:
ns = r.search(root.tag).group(1)
except Exception, e:
print e
return ns
if __name__ =="__main__":
"""
xml的内容如下:(目前中文会乱码 )
xiaozhao
m
30
zhao
boy
12
"""
operationXML("D:/a.xml", "employee",["name"])