OS:Windows 7
关键字:Python3.4,XML,ElementTree,minidom
本文介绍用Python解析生成以下XML:
<Persons> <Person> <Name>LDL</Name> <Description Language='English'><![CDATA[cdata text]]></Description> </Person> <Person> <Name>China</Name> <Description Language='English'><![CDATA[cdata text]]></Description> </Person> </Persons>
1.创建一个xml文件名为src.xml,内容如上,放到c:\temp
2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。
ElementTreeSample.py如下:
# -*- coding: utf-8 -*- """ Sample of xml.etree.ElementTree @author: ldlchina """ import os import sys import logging import traceback import xml.etree.ElementTree as ET import time def copy_node(src_node, target_node): # Copy attr for key in src_node.keys(): target_node.set(key, src_node.get(key)) if len(list(src_node)) > 0: for child in src_node: target_child = ET.Element(child.tag) target_node.append(target_child) copy_node(child, target_child) else: target_node.text = src_node.text def read_write_xml(src, target): tree = ET.parse(src) root = tree.getroot() target_root = ET.Element(root.tag) start_time = time.time() * 1000 copy_node(root, target_root) end_time = time.time() * 1000 print('copy_node:' + str(end_time - start_time)) target_tree = ET.ElementTree(target_root) target_tree.write(target) logging.info(target) def main(): try: current_file = os.path.realpath(__file__) # Configure logger log_file = current_file.replace('.py', '.log') logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO) # Create console handler ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger('') logger.addHandler(ch) #src = sys.argv[1] #target = sys.argv[2] # For debugging src = 'C:/temp/src.xml' target = 'C:/temp/target-tree.xml' # Generate results start_time = time.time() * 1000 read_write_xml(src, target) end_time = time.time() * 1000 print('read_write_xml:' + str(end_time - start_time)) except: logging.exception(''.format(traceback.format_exc())) input('Press any key to exit...') main()
3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。
MinidomSample.py如下:
# -*- coding: utf-8 -*- """ Sample of xml.dom.minidom @author: ldlchina """ import os import sys import logging import traceback import xml.dom.minidom as MD import time def get_text(n): nodelist = n.childNodes rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE: rc = rc + node.data return rc def copy_node(target_doc, src_node, target_node): if not isinstance(src_node, MD.Document) and src_node.hasAttributes(): for item in src_node.attributes.items(): target_node.setAttribute(item[0], item[1]) for node in src_node.childNodes: if node.nodeType == node.TEXT_NODE: target_child = target_doc.createTextNode(node.nodeValue) target_node.appendChild(target_child) elif node.nodeType == node.CDATA_SECTION_NODE: target_child = target_doc.createCDATASection(node.nodeValue) target_node.appendChild(target_child) elif node.nodeType == node.ELEMENT_NODE: target_child = target_doc.createElement(node.nodeName) target_node.appendChild(target_child) copy_node(target_doc, node, target_child) def read_write_xml(src, target): doc = MD.parse(src) target_doc = MD.Document() start_time = time.time() * 1000 copy_node(target_doc, doc, target_doc) end_time = time.time() * 1000 print('copy_node: ' + str(end_time - start_time)) # Write to file f = open(target, 'w') f.write(target_doc.documentElement.toxml()) f.close() logging.info(target) def main(): try: current_file = os.path.realpath(__file__) # Configure logger log_file = current_file.replace('.py', '.log') logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO) # Create console handler ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger('') logger.addHandler(ch) #src = sys.argv[1] #target = sys.argv[2] # For debugging src = 'C:/temp/src.xml' target = 'C:/temp/target-dom.xml' # Generate results start_time = time.time() * 1000 read_write_xml(src, target) end_time = time.time() * 1000 print('read_write_xml: ' + str(end_time - start_time)) except: logging.exception(''.format(traceback.format_exc())) input('Press any key to exit...') main()
4.运行ElementTreeSample.py,得到XML如下:
<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>
5.运行MinidomSample.py,得到XML如下:
<Persons> <Person> <Name>LDL</Name> <Description Language="English"><![CDATA[cdata text]]></Description> </Person> <Person> <Name>China</Name> <Description Language="Chinese"><![CDATA[cdata text]]></Description> </Person> </Persons>
ElementTree VS minidom:
1.ElementTree执行速度会比minidom快一些。
2.ElemenTree不能分析XML的转行和缩进。minidom可以。
3.ElemenTree不支持CDATA,minidom可以。