xml.dom.minidom

import xml.dom.minidom
document = """\
            <slideshow>
            <title>Demo slideshow</title>
            <slide><title>Slide title</title>
            <point>This is a demo</point>
            <point>Of a program for processing slides</point>
            </slide>
            <slide><title>Another demo slide</title>
            <point>It is important</point>
            <point>To have more than</point>
            <point>one slide</point>
            </slide>
            </slideshow>
            """
dom = xml.dom.minidom.parseString(document) #获取一个xml文档对象
def getText(nodelist):
    rc = []
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:    #检查是不是文本类型
            rc.append(node.data)               #如果是文本类型就把数据读取出来
    return ''.join(rc)
def handleSlideshow(slideshow):
    print("<html>")
    handleSlideshowTitle(slideshow.getElementsByTagName("title")[0]) #获取标签名
    slides = slideshow.getElementsByTagName("slide")
    handleToc(slides)
    handleSlides(slides)
    print("</html>")
def handleSlides(slides):
    for slide in slides:
        handleSlide(slide)
def handleSlide(slide):
    handleSlideTitle(slide.getElementsByTagName("title")[0])
    handlePoints(slide.getElementsByTagName("point"))
def handleSlideshowTitle(title):
    print("<title>%s</title>" % getText(title.childNodes))  #检查是否有子节点
def handleSlideTitle(title):
    print("<h2>%s</h2>" % getText(title.childNodes))
def handlePoints(points):
    print("<ul>")
    for point in points:
        handlePoint(point)
    print("</ul>")
def handlePoint(point):
    print("<li>%s</li>" % getText(point.childNodes))
def handleToc(slides):
    for slide in slides:
        title = slide.getElementsByTagName("title")[0]
        print("<p>%s</p>" % getText(title.childNodes))
handleSlideshow(dom)

 

你可能感兴趣的:(xml.dom.minidom)