一 python写xml文件的操作
要生成的xml文件格式如下:
<?xml version="1.0" ?> <!--Simple xml document__chapter 8--> <book> <title> sample xml thing </title> <author> <name> <first> ma </first> <last> xiaoju </last> </name> <affiliation> Springs Widgets, Inc. </affiliation> </author> <chapter number="1"> <title> First </title> <para> I think widgets are greate.You should buy lots of them forom <company> Spirngy Widgts, Inc </company> </para> </chapter> </book>
from xml.dom import minidom, Node doc = minidom.Document() doc.appendChild(doc.createComment("Simple xml document__chapter 8")) #generate the book book = doc.createElement('book') doc.appendChild(book) #the title title = doc.createElement('title') title.appendChild(doc.createTextNode("sample xml thing")) book.appendChild(title) #the author section author = doc.createElement("author") book.appendChild(author) name = doc.createElement('name') author.appendChild(name) firstname = doc.createElement('first') firstname.appendChild(doc.createTextNode("ma")) name.appendChild(firstname) lastname = doc.createElement('last') name.appendChild(lastname) lastname.appendChild(doc.createTextNode("xiaoju")) affiliation = doc.createElement("affiliation") affiliation.appendChild(doc.createTextNode("Springs Widgets, Inc.")) author.appendChild(affiliation) #The chapter chapter = doc.createElement('chapter') chapter.setAttribute('number', '1') title = doc.createElement('title') title.appendChild(doc.createTextNode("First")) chapter.appendChild(title) book.appendChild(chapter) para = doc.createElement('para') para.appendChild(doc.createTextNode("I think widgets are greate.\ You should buy lots of them forom")) company = doc.createElement('company') company.appendChild(doc.createTextNode("Spirngy Widgts, Inc")) para.appendChild(company) chapter.appendChild(para) print doc.toprettyxml()
xml文件内容:
<?xml version="1.0" ?> <!--Simple xml document__chapter 8--> <book> <title> sample xml thing </title> <author> <name> <first> ma </first> <last> xiaoju </last> </name> <affiliation> Springs Widgets, Inc. </affiliation> </author> <chapter number="1"> <title> First </title> <para> I think widgets are greate.You should buy lots of them forom <company> Spirngy Widgts, Inc </company> </para> </chapter> </book>
from xml.dom import minidom, Node import re, textwrap ######################################################################## class SampleScanner: """""" #---------------------------------------------------------------------- def __init__(self, doc): """Constructor""" assert(isinstance(doc, minidom.Document)) for child in doc.childNodes: if child.nodeType == Node.ELEMENT_NODE and \ child.tagName == "book": self.handle_book(child) def handle_book(self, node): for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "title": print "Book titile is:", self.gettext(child.childNodes) if child.tagName == "author": self.handle_author(child) if child.tagName == "chapter": self.handle_chapter(child) def handle_chapter(self, node): number = node.getAttribute("number") print "number:", number title_node = node.getElementsByTagName("title") print "title:", self.gettext(title_node) for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "para": self.handle_chapter_para(child) def handle_chapter_para(self, node): company = "" company = self.gettext(node.getElementsByTagName("company")) print "chapter:para:company", company def handle_author(self, node): for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "name": self.handle_author_name(child) if child.tagName == "affiliation": print "affiliation:", self.gettext(child.childNodes) def handle_author_name(self, node): first = "" last = "" for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "first": first = self.gettext(child.childNodes) if child.tagName == 'last': last = self.gettext(child.childNodes) print "firstname:%s,lastname:%s" % (first, last) def gettext(self, nodelist): retlist = [] for node in nodelist: if node.nodeType == Node.TEXT_NODE: retlist.append(node.wholeText) elif node.hasChildNodes: retlist.append(self.gettext(node.childNodes)) return re.sub('\s+', " ", ''.join(retlist)) if __name__=="__main__": doc = minidom.parse("simple.xml") sample = SampleScanner(doc)