学习《Python基础教程》第三版,到第20章“自动添加标签”项目,从官网下载了源代码运行,发现输出的test_output.html中每一行内容都重复了三次。下载了第二版的代码 比较后发现第三版代码和书中代码都有一个小bug,markup.py里parse函数中for rule in self.rules:
一句向后缩进了4格导致进入了上一个循环。修正后的代码如下:
import sys, re
from handlers import *
from util import *
from rules import *
class Parser:
"""
A Parser reads a text file, applying rules and controlling a handler.
"""
def __init__(self, handler):
self.handler = handler
self.rules = []
self.filters = []
def addRule(self, rule):
self.rules.append(rule)
def addFilter(self, pattern, name):
def filter(block, handler):
return re.sub(pattern, handler.sub(name), block)
self.filters.append(filter)
def parse(self, file):
self.handler.start('document')
for block in blocks(file):
for filter in self.filters:
block = filter(block, self.handler)
for rule in self.rules:
if rule.condition(block):
if rule.action(block, self.handler): break
self.handler.end('document')
class BasicTextParser(Parser):
"""
A specific Parser that adds rules and filters in its constructor.
"""
def __init__(self, handler):
Parser.__init__(self, handler)
self.addRule(ListRule())
self.addRule(ListItemRule())
self.addRule(TitleRule())
self.addRule(HeadingRule())
self.addRule(ParagraphRule())
self.addFilter(r'\*(.+?)\*', 'emphasis')
self.addFilter(r'(http://[\.a-zA-Z/]+)', 'url')
self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)', 'mail')
handler = HTMLRenderer()
parser = BasicTextParser(handler)
# file = open('test_input.txt','r')
# parser.parse(file)
parser.parse(sys.stdin)
#python markup.py < test_input.txt > test_output.html