Python解析生成XML-ElementTree VS minidom

OS:Windows 7

关键字:Python3.4,XML,ElementTree,minidom

 

本文介绍用Python解析生成以下XML:

<Persons>

    <Person>

        <Name>LDL</Name>

        <Description Language='English'><![CDATA[cdata text]]></Description>

    </Person>

    <Person>

        <Name>China</Name>

        <Description Language='English'><![CDATA[cdata text]]></Description>

    </Person>

</Persons>

1.创建一个xml文件名为src.xml,内容如上,放到c:\temp

2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

ElementTreeSample.py如下:

# -*- coding: utf-8 -*-

"""

Sample of xml.etree.ElementTree



@author: ldlchina

"""



import os

import sys

import logging

import traceback

import xml.etree.ElementTree as ET

import time



def copy_node(src_node, target_node):

    # Copy attr

    for key in src_node.keys():

        target_node.set(key, src_node.get(key))

    

    if len(list(src_node)) > 0:

        for child in src_node:

            target_child = ET.Element(child.tag)

            target_node.append(target_child)

            copy_node(child, target_child)

    else:

        target_node.text = src_node.text

    

def read_write_xml(src, target):

    tree = ET.parse(src)

    root = tree.getroot()

    

    target_root = ET.Element(root.tag)

    start_time = time.time() * 1000

    copy_node(root, target_root)

    end_time = time.time() * 1000

    print('copy_node:' + str(end_time - start_time))

    

    target_tree = ET.ElementTree(target_root)

    target_tree.write(target)

    logging.info(target)



def main():

    try:

        current_file = os.path.realpath(__file__)

        

        # Configure logger

        log_file = current_file.replace('.py', '.log')

        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)

        

        # Create console handler

        ch = logging.StreamHandler()

        ch.setLevel(logging.INFO)

        

        logger = logging.getLogger('')

        logger.addHandler(ch)

        

        #src = sys.argv[1]

        #target = sys.argv[2]

        

        # For debugging

        src = 'C:/temp/src.xml'

        target = 'C:/temp/target-tree.xml'

        

        # Generate results

        start_time = time.time() * 1000

        read_write_xml(src, target)

        end_time = time.time() * 1000

        print('read_write_xml:' + str(end_time - start_time))

    except:

        logging.exception(''.format(traceback.format_exc()))

    

    input('Press any key to exit...')



main()

 3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

MinidomSample.py如下:

# -*- coding: utf-8 -*-

"""

Sample of xml.dom.minidom



@author: ldlchina

"""



import os

import sys

import logging

import traceback

import xml.dom.minidom as MD

import time



def get_text(n):

    nodelist = n.childNodes

    rc = ""

    for node in nodelist:

        if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:

            rc = rc + node.data

    return rc



def copy_node(target_doc, src_node, target_node):

    if not isinstance(src_node, MD.Document) and src_node.hasAttributes():

        for item in src_node.attributes.items():

            target_node.setAttribute(item[0], item[1])

    for node in src_node.childNodes:

        if node.nodeType == node.TEXT_NODE:

            target_child = target_doc.createTextNode(node.nodeValue)

            target_node.appendChild(target_child)

        elif node.nodeType == node.CDATA_SECTION_NODE:

            target_child = target_doc.createCDATASection(node.nodeValue)

            target_node.appendChild(target_child)

        elif node.nodeType == node.ELEMENT_NODE:

            target_child = target_doc.createElement(node.nodeName)

            target_node.appendChild(target_child)

            copy_node(target_doc, node, target_child)

    

def read_write_xml(src, target):

    doc = MD.parse(src)

    target_doc = MD.Document()



    start_time = time.time() * 1000

    copy_node(target_doc, doc, target_doc)

    end_time = time.time() * 1000

    print('copy_node: ' + str(end_time - start_time))

    

    # Write to file

    f = open(target, 'w')

    f.write(target_doc.documentElement.toxml())

    f.close()

    logging.info(target)



def main():

    try:

        current_file = os.path.realpath(__file__)

        

        # Configure logger

        log_file = current_file.replace('.py', '.log')

        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)

        

        # Create console handler

        ch = logging.StreamHandler()

        ch.setLevel(logging.INFO)

        

        logger = logging.getLogger('')

        logger.addHandler(ch)

        

        #src = sys.argv[1]

        #target = sys.argv[2]

        

        # For debugging

        src = 'C:/temp/src.xml'

        target = 'C:/temp/target-dom.xml'

        

        # Generate results

        start_time = time.time() * 1000

        read_write_xml(src, target)

        end_time = time.time() * 1000

        print('read_write_xml: ' + str(end_time - start_time))

    except:

        logging.exception(''.format(traceback.format_exc()))

    

    input('Press any key to exit...')



main()

4.运行ElementTreeSample.py,得到XML如下:

<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

5.运行MinidomSample.py,得到XML如下:

<Persons>

    <Person>

        <Name>LDL</Name>

        <Description Language="English"><![CDATA[cdata text]]></Description>

    </Person>

    <Person>

        <Name>China</Name>

        <Description Language="Chinese"><![CDATA[cdata text]]></Description>

    </Person>

</Persons>

 

ElementTree VS minidom:

1.ElementTree执行速度会比minidom快一些。

2.ElemenTree不能分析XML的转行和缩进。minidom可以。

3.ElemenTree不支持CDATA,minidom可以。

你可能感兴趣的:(element)