python解析xml文件操作的例子

python解析xml文件操作实例,操作XML文件的常见技巧

xml文件内容:

<?xml version="1.0" ?> 

<!--Simple xml document__chapter 8-->

<book> 

<title> 

sample xml thing 

</title> 

<author> 

<name> 

<first> 

ma 

</first> 

<last> 

xiaoju 

</last> 

</name> 

<affiliation> 

Springs Widgets, Inc. 

</affiliation> 

</author> 

<chapter number="1"> 

<title> 

First 

</title> 

<para> 

I think widgets are greate.You should buy lots of them forom 

<company> 

Spirngy Widgts, Inc 

</company> 

</para> 

</chapter> 

</book> 

python代码

from xml.dom import minidom, Node 

import re, textwrap ## www.jbxue.com



class SampleScanner: 

""""""



def __init__(self, doc): 

"""Constructor"""

assert(isinstance(doc, minidom.Document)) 

for child in doc.childNodes: 

if child.nodeType == Node.ELEMENT_NODE and \ 

child.tagName == "book": 

self.handle_book(child) 



def handle_book(self, node): 



for child in node.childNodes: 

if child.nodeType != Node.ELEMENT_NODE: 

continue

if child.tagName == "title": 

print "Book titile is:", self.gettext(child.childNodes) 

if child.tagName == "author": 

self.handle_author(child) 

if child.tagName == "chapter": 

self.handle_chapter(child) 



def handle_chapter(self, node): 

number = node.getAttribute("number") 

print "number:", number 

title_node = node.getElementsByTagName("title") 

print "title:", self.gettext(title_node) 



for child in node.childNodes: 

if child.nodeType != Node.ELEMENT_NODE: 

continue

if child.tagName == "para": 

self.handle_chapter_para(child) 



def handle_chapter_para(self, node): 

company = "" 

company = self.gettext(node.getElementsByTagName("company")) 

print "chapter:para:company", company 



def handle_author(self, node): 

for child in node.childNodes: 

if child.nodeType != Node.ELEMENT_NODE: 

continue

if child.tagName == "name": 

self.handle_author_name(child) 

if child.tagName == "affiliation": 

print "affiliation:", self.gettext(child.childNodes) 



def handle_author_name(self, node): 

first = "" 

last = "" 

for child in node.childNodes: 

if child.nodeType != Node.ELEMENT_NODE: 

continue

if child.tagName == "first": 

first = self.gettext(child.childNodes) 

if child.tagName == 'last': 

last = self.gettext(child.childNodes) 



print "firstname:%s,lastname:%s" % (first, last) 



def gettext(self, nodelist): 

retlist = [] 

for node in nodelist: 

if node.nodeType == Node.TEXT_NODE: 

retlist.append(node.wholeText) 

elif node.hasChildNodes: 

retlist.append(self.gettext(node.childNodes)) 



return re.sub('\s+', " ", ''.join(retlist)) 



if __name__=="__main__": 

doc = minidom.parse("simple.xml") 

sample = SampleScanner(doc)

你可能感兴趣的:(python)