python xml

一 python写xml文件的操作

要生成的xml文件格式如下:

<?xml version="1.0" ?>
<!--Simple xml document__chapter 8-->
<book>
	<title>
		sample xml thing
	</title>
	<author>
		<name>
			<first>
				ma
			</first>
			<last>
				xiaoju
			</last>
		</name>
		<affiliation>
			Springs Widgets, Inc.
		</affiliation>
	</author>
	<chapter number="1">
		<title>
			First
		</title>
		<para>
			I think widgets are greate.You should buy lots of them forom
			<company>
				Spirngy Widgts, Inc
			</company>
		</para>
	</chapter>
</book>

Code:

from xml.dom import minidom, Node

doc = minidom.Document()

doc.appendChild(doc.createComment("Simple xml document__chapter 8"))

#generate the book
book = doc.createElement('book')
doc.appendChild(book)

#the title
title = doc.createElement('title')
title.appendChild(doc.createTextNode("sample xml thing"))
book.appendChild(title)

#the author section
author = doc.createElement("author")
book.appendChild(author)
name = doc.createElement('name')
author.appendChild(name)
firstname = doc.createElement('first')
firstname.appendChild(doc.createTextNode("ma"))
name.appendChild(firstname)
lastname = doc.createElement('last')
name.appendChild(lastname)
lastname.appendChild(doc.createTextNode("xiaoju"))

affiliation = doc.createElement("affiliation")
affiliation.appendChild(doc.createTextNode("Springs Widgets, Inc."))
author.appendChild(affiliation)

#The chapter
chapter = doc.createElement('chapter')
chapter.setAttribute('number', '1')
title = doc.createElement('title')
title.appendChild(doc.createTextNode("First"))
chapter.appendChild(title)
book.appendChild(chapter)

para = doc.createElement('para')
para.appendChild(doc.createTextNode("I think widgets are greate.\
You should buy lots of them forom"))
company = doc.createElement('company')
company.appendChild(doc.createTextNode("Spirngy Widgts, Inc"))
para.appendChild(company)
chapter.appendChild(para)


print doc.toprettyxml()

二 python 解析xml文件

xml文件内容:

<?xml version="1.0" ?>
<!--Simple xml document__chapter 8-->
<book>
	<title>
		sample xml thing
	</title>
	<author>
		<name>
			<first>
				ma
			</first>
			<last>
				xiaoju
			</last>
		</name>
		<affiliation>
			Springs Widgets, Inc.
		</affiliation>
	</author>
	<chapter number="1">
		<title>
			First
		</title>
		<para>
			I think widgets are greate.You should buy lots of them forom
			<company>
				Spirngy Widgts, Inc
			</company>
		</para>
	</chapter>
</book>

python代码:

from xml.dom import minidom, Node
import re, textwrap
########################################################################
class SampleScanner:
    """"""
    #----------------------------------------------------------------------
    def __init__(self, doc):
        """Constructor"""
        assert(isinstance(doc, minidom.Document))
        for child in doc.childNodes:
            if child.nodeType == Node.ELEMENT_NODE and \
               child.tagName == "book":
                self.handle_book(child)
                
    def handle_book(self, node):
        
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "title":
                print "Book titile is:", self.gettext(child.childNodes)
            if child.tagName == "author":
                self.handle_author(child)
            if child.tagName == "chapter":
                self.handle_chapter(child)
                
    def handle_chapter(self, node):
        number = node.getAttribute("number")
        print "number:", number
        title_node = node.getElementsByTagName("title")
        print "title:", self.gettext(title_node)
        
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "para":
                self.handle_chapter_para(child)
                
    def handle_chapter_para(self, node):
        company = ""
        company = self.gettext(node.getElementsByTagName("company"))
        print "chapter:para:company", company
        
                
    def handle_author(self, node):
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "name":
                self.handle_author_name(child)
            if child.tagName == "affiliation":
                print "affiliation:", self.gettext(child.childNodes)
                
    def handle_author_name(self, node):
        first = ""
        last = ""
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "first":
                first = self.gettext(child.childNodes)
            if child.tagName == 'last':
                last = self.gettext(child.childNodes)
                
        print "firstname:%s,lastname:%s" % (first, last)
        
                
    def gettext(self, nodelist):
        retlist = []
        for node in nodelist:
            if node.nodeType == Node.TEXT_NODE:
                retlist.append(node.wholeText)
            elif node.hasChildNodes:
                retlist.append(self.gettext(node.childNodes))
                
        return re.sub('\s+', " ", ''.join(retlist))
                    
if __name__=="__main__":
    doc = minidom.parse("simple.xml")
    sample = SampleScanner(doc)
    


你可能感兴趣的:(xml,python,import,Constructor)