python-object-twohtml>_1

1.准备样例文件website.xml:

<website>
  <page name="index" title="Home Page">
    <h1>Welcome to My Home Page</h1>

    <p>Hi, there. My name is Mr. Yang. and This is my home page. Here are some of my interests:</p>

    <ul>
      <li><a href="interests/shouting.html">Shouting</a></li>
      <li><a href="interests/Sleeping.html">Sleeping</a></li>
      <li><a href="interests/eating.html">Eating</a></li>
    </ul>
  </page>

  <directory name="interests">
    <page name="shouting" title="Shouting">
      <h1>Shouting Page of Mr. yang</h1>

      <p>...</p>
    </page>
    <page name="sleeping" title="Sleeping">
      <h1>Sleeping Page of Mr. yang</h1>

      <p>...</p>
    </page>
    <page name="eating" title="Eating">
      <h1>Eating Page of Mr. yang</h1>

      <p>...</p>
    </page>
  </directory>
</website>
2. 简单的实现,编写python脚本xml_1.py:

#! /usr/bin/env python2.6
# Written by Tony.yang

#
from xml.sax import parse
from xml.sax.handler import ContentHandler

class PageMaker(ContentHandler):
        passthrough = False
        def startElement(self, name, attrs):
                if name == 'page':
                        self.passthrough = True
                        self.out = open(attrs['name'] + '.html', 'w')
                        self.out.write('<html><head>\n')
                        self.out.write('<title>%s</title>\n' % attrs['title'])
                        self.out.write('</head><body>\n')
                elif self.passthrough:
                        self.out.write('<' + name)
                        for key, val in attrs.items():
                                self.out.write('%s="%s"' % (key, val))
                        self.out.write('>')

        def endElement(self, name):
                if name == 'page':
                        self.passthrough = False
                        self.out.write('\n</body></html>\n')
                        self.out.close()
                elif self.passthrough:
                        self.out.write('</%s>' % name)

        def characters(self, chars):
                if self.passthrough: self.out.write(chars)

parse('website.xml', PageMaker())
 

 3. 优化后的脚本:(添加了一些其他功能):

xml_2.py:

#! /usr/bin/env python2.6

from xml.sax.handler import ContentHandler
from xml.sax import parse
import os

class Dispatcher:
        def dispatch(self, prefix, name, attrs=None):
                mname = prefix + name.capitalize()
                dname = 'default' + prefix.capitalize()
                method = getattr(self, mname, None)
                if callable(method):
                        args = ()
                else:
                        method = getattr(self, dname, None)
                        args = name,
                if prefix == 'start':
                        args += attrs,

                if callable(method):
                        method(*args)

        def startElement(self, name, attrs):
                self.dispatch('start', name, attrs)
   
        def endElement(self, name):
                self.dispatch('end', name)


class WebsiteConstructor(Dispatcher, ContentHandler):
        passthrough = False

        def __init__(self, directory):
                self.directory = [directory]
                self.ensureDirectory()

        def ensureDirectory(self):
                path = os.path.join(*self.directory)
                if not os.path.isdir(path): os.makedirs(path)

        def characters(self, chars):
                if self.passthrough: self.out.write(chars)

        def defaultStart(self, name, attrs):
                if self.passthrough:
                        self.out.write('<' + name)
                        for key, val in attrs.items():
                                self.out.write(' %s="%s"' % (key, val))
                        self.out.write('>')

        def defaultEnd(self, name):
                if self.passthrough:
                        self.out.write('</%s>' % name)

        def startDirectory(self, attrs):
                self.directory.append(attrs['name'])
                self.ensureDirectory()

        def endDirectory(self):
                self.directory.pop()


        def startPage(self, attrs):
                filename = os.path.join(*self.directory + [attrs['name'] + '.html'])
                self.out = open(filename, 'w')
                self.writeHeader(attrs['title'])
                self.passthrough = True

        def endPage(self):
                self.passthrough = False
                self.writeFooter()
                self.out.close()

        def writeHeader(self, title):
                self.out.write('<html>\n<head>\n    <title>')
                self.out.write(title)
                self.out.write('</title>\n</head>\n    <body>\n')

        def writeFooter(self):
                self.out.write('\n</body>\n</html>\n')

parse('website.xml', WebsiteConstructor('public_html'))
 

 

 

 

 

 

你可能感兴趣的:(python,XML文件,html文件)