1.准备样例文件website.xml:


 
   

Welcome to My Home Page



   

Hi, there. My name is Mr. Yang. and This is my home page. Here are some of my interests:



   
 

 
   
     

Shouting Page of Mr. yang



     

...


   

   
     

Sleeping Page of Mr. yang



     

...


   

   
     

Eating Page of Mr. yang



     

...


   

 


2. 简单的实现,编写python脚本xml_1.py:

#! /usr/bin/env python2.6
# Written by Tony.yang

#
from xml.sax import parse
from xml.sax.handler import ContentHandler

class PageMaker(ContentHandler):
        passthrough = False
        def startElement(self, name, attrs):
                if name == 'page':
                        self.passthrough = True
                        self.out = open(attrs['name'] + '.html', 'w')
                        self.out.write('\n')
                        self.out.write('%s\n' % attrs['title'])
                        self.out.write('\n')
                elif self.passthrough:
                        self.out.write('<' + name)
                        for key, val in attrs.items():
                                self.out.write('%s="%s"' % (key, val))
                        self.out.write('>')

        def endElement(self, name):
                if name == 'page':
                        self.passthrough = False
                        self.out.write('\n\n')
                        self.out.close()
                elif self.passthrough:
                        self.out.write('' % name)

        def characters(self, chars):
                if self.passthrough: self.out.write(chars)

parse('website.xml', PageMaker())
 

 3. 优化后的脚本:(添加了一些其他功能):

xml_2.py:

#! /usr/bin/env python2.6

from xml.sax.handler import ContentHandler
from xml.sax import parse
import os

class Dispatcher:
        def dispatch(self, prefix, name, attrs=None):
                mname = prefix + name.capitalize()
                dname = 'default' + prefix.capitalize()
                method = getattr(self, mname, None)
                if callable(method):
                        args = ()
                else:
                        method = getattr(self, dname, None)
                        args = name,
                if prefix == 'start':
                        args += attrs,

                if callable(method):
                        method(*args)

        def startElement(self, name, attrs):
                self.dispatch('start', name, attrs)
   
        def endElement(self, name):
                self.dispatch('end', name)


class WebsiteConstructor(Dispatcher, ContentHandler):
        passthrough = False

        def __init__(self, directory):
                self.directory = [directory]
                self.ensureDirectory()

        def ensureDirectory(self):
                path = os.path.join(*self.directory)
                if not os.path.isdir(path): os.makedirs(path)

        def characters(self, chars):
                if self.passthrough: self.out.write(chars)

        def defaultStart(self, name, attrs):
                if self.passthrough:
                        self.out.write('<' + name)
                        for key, val in attrs.items():
                                self.out.write(' %s="%s"' % (key, val))
                        self.out.write('>')

        def defaultEnd(self, name):
                if self.passthrough:
                        self.out.write('' % name)

        def startDirectory(self, attrs):
                self.directory.append(attrs['name'])
                self.ensureDirectory()

        def endDirectory(self):
                self.directory.pop()


        def startPage(self, attrs):
                filename = os.path.join(*self.directory + [attrs['name'] + '.html'])
                self.out = open(filename, 'w')
                self.writeHeader(attrs['title'])
                self.passthrough = True

        def endPage(self):
                self.passthrough = False
                self.writeFooter()
                self.out.close()

        def writeHeader(self, title):
                self.out.write('\n\n    ')<br>                 self.out.write(title)<br>                 self.out.write('\n\n    \n')

        def writeFooter(self):
                self.out.write('\n\n\n')

parse('website.xml', WebsiteConstructor('public_html'))