用Python写一个文本转HTML的脚本

在 实验楼 上看的,自己看了一下感觉用处很大,代码也不难懂,特分享一下。

给定一个.txt文件,将其转换成HTML网页。

我们先来编写代码:

#!/usr/bin/python
# encoding: utf-8
#util.py
def lines(file):
    """
    生成器,在文本最后加一空行
    """
    for line in file: yield line
    yield '\n'

def blocks(file):
    """
    生成器,生成单独的文本块
    """
    block = []
    for line in lines(file):
        if line.strip():
            block.append(line)
        elif block:
            yield ''.join(block).strip()
            block = []
#!/usr/bin/python
# encoding: utf-8
#handlers.py
class Handler:
    """
    处理程序父类
    """
    def callback(self, prefix, name, *args):
        method = getattr(self, prefix + name, None)
        if callable(method): return method(*args)

    def start(self, name):
        self.callback('start_', name)

    def end(self, name):
        self.callback('end_', name)

    def sub(self, name):
        def substitution(match):
            result = self.callback('sub_', name, match)
            if result is None: result = match.group(0)
            return result
        return substitution

class HTMLRenderer(Handler):
    """
    HTML 处理程序,给文本块加相应的 HTML 标记
    """
    def start_document(self):
        print ('ShiYanLou')

    def end_document(self):
        print ('')

    def start_paragraph(self):
        print ('

') def end_paragraph(self): print ('

') def start_heading(self): print ('

') def end_heading(self): print ('

') def start_list(self): print ('
    ') def end_list(self): print ('
') def start_listitem(self): print ('
  • ') def end_listitem(self): print ('
  • ') def start_title(self): print ('

    ') def end_title(self): print ('

    ') def sub_emphasis(self, match): return '%s' % match.group(1) def sub_url(self, match): return '%s' % (match.group(1), match.group(1)) def sub_mail(self, match): return '%s' % (match.group(1), match.group(1)) def feed(self, data): print (data)
    #!/usr/bin/python
    # encoding: utf-8
    #rules.py
    class Rule:
        """
        规则父类
        """
        def action(self, block, handler):
            """
            加标记
            """
            handler.start(self.type)
            handler.feed(block)
            handler.end(self.type)
            return True
    
    class HeadingRule(Rule):
        """
        一号标题规则
        """
        type = 'heading'
        def condition(self, block):
            """
            判断文本块是否符合规则
            """
            return not '\n' in block and len(block) <= 70 and not block[-1] == ':'
    
    class TitleRule(HeadingRule):
        """
        二号标题规则
        """
        type = 'title'
        first = True
    
        def condition(self, block):
            if not self.first: return False
            self.first = False
            return HeadingRule.condition(self, block);
    
    class ListItemRule(Rule):
        """
        列表项规则
        """
        type = 'listitem'
        def condition(self, block):
            return block[0] == '-'
    
        def action(self, block, handler):
            handler.start(self.type)
            handler.feed(block[1:].strip())
            handler.end(self.type)
            return True
    
    class ListRule(ListItemRule):
        """
        列表规则
        """
        type = 'list'
        inside = False
        def condition(self, block):
            return True
    
        def action(self, block, handler):
            if not self.inside and ListItemRule.condition(self, block):
                handler.start(self.type)
                self.inside = True
            elif self.inside and not ListItemRule.condition(self, block):
                handler.end(self.type)
                self.inside = False
            return False
    
    class ParagraphRule(Rule):
        """
        段落规则
        """
        type = 'paragraph'
    
        def condition(self, block):
            return True
    #!/usr/bin/python
    # encoding: utf-8
    #makeup.py
    import sys, re
    from handlers import *
    from util import *
    from rules import *
    
    class Parser:
        """
        解析器父类
        """
        def __init__(self, handler):
            self.handler = handler
            self.rules = []
            self.filters = []
    
        def addRule(self, rule):
            """
            添加规则
            """
            self.rules.append(rule)
    
        def addFilter(self, pattern, name):
            """
            添加过滤器
            """
            def filter(block, handler):
                return re.sub(pattern, handler.sub(name), block)
            self.filters.append(filter)
    
        def parse(self, file):
            """
            解析
            """
            self.handler.start('document')
            for block in blocks(file):
                for filter in self.filters:
                    block = filter(block, self.handler)
                for rule in self.rules:
                    if rule.condition(block):
                        last = rule.action(block, self.handler)
                        if last: break
            self.handler.end('document')
    
    class BasicTextParser(Parser):
        """
        纯文本解析器
        """
        def __init__(self, handler):
            Parser.__init__(self, handler)
            self.addRule(ListRule())
            self.addRule(ListItemRule())
            self.addRule(TitleRule())
            self.addRule(HeadingRule())
            self.addRule(ParagraphRule())
    
            self.addFilter(r'\*(.+?)\*', 'emphasis')
            self.addFilter(r'(http://[\.a-zA-Z/]+)', 'url')
            self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)', 'mail')
    
    """
    运行程序
    """
    handler = HTMLRenderer()
    parser = BasicTextParser(handler)
    parser.parse(sys.stdin)
    

    写完之后,让我们来测试一下。



    用Python写一个文本转HTML的脚本_第1张图片












    看,大功告成

    你可能感兴趣的:(技术)