python版小说分割转换器 | #python

  前段时间写了个简单的 TXT2HTML小说转换器HTA版,现在拿python再实现一遍,自动按章节分割成多个HTML文件,并建立目录,方便阅读。

效果图:
python版小说分割转换器 | #python_第1张图片

脚本代码:
# encoding: gbk
#
# 将txt小说分割转换成多个HTML文件
#
# @author : GreatGhoul
# @email  : [email protected]
# @blog   : http://greatghoul.iteye.com

import re
import os

# regex for the section title
# sec_re = re.compile(r'第.+卷\s+.+\s+第.+章\s+.+')

# txt book's path.
source_path = 'f:\\佣兵天下.txt'

path_pieces = os.path.split(source_path)
novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1])
target_path = '%s%s_html' % (path_pieces[0], novel_title)
section_re = re.compile(r'^\s*第.+卷\s+.*$')
section_head = '''
    <html>
        <head>
            <meta http-equiv="Content-Type" content="GBK"/>
            <title>%s</title>
        </head>
        <body style="font-family:楷体,宋体;font-size:16px; margin:0;
            padding: 20px; background:#FAFAD2;color:#2B4B86;text-align:center;">
            <h2>%s</h2><a href="#bottom">去页尾</a><hr/>'''

# escape xml/html
def escape_xml(code):
    text = code
    text = re.sub(r'<', '&lt;', text)
    text = re.sub(r'>', '&gt;', text)
    text = re.sub(r'&', '&amp;', text)
    text = re.sub(r'\t', '&nbsp;&nbsp;&nbsp;&nbsp;', text)
    text = re.sub(r'\s', '&nbsp;', text)
    return text

# entry of the script
def main():
    # create the output folder
    if not os.path.exists(target_path):
        os.mkdir(target_path)

    # open the source file
    input = open(source_path, 'r')

    sec_count = 0
    sec_cache = []
    idx_cache = []

    output = open('%s\\%d.html' % (target_path, sec_count), 'w')
    preface_title = '%s 前言' % novel_title
    output.writelines([section_head % (preface_title, preface_title)])
    idx_cache.append('<li><a href="%d.html">%s</a></li>'
                     % (sec_count, novel_title))
        
    for line in input:
        # is a chapter's title?
        if line.strip() == '':
            pass
        elif re.match(section_re, line):
            line = re.sub(r'\s+', ' ', line)
            print 'converting %s...' % line

            # write the section footer
            sec_cache.append('<hr/><p>')
            if sec_count == 0:
                sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')
                sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;'
                                 % (sec_count + 1))
            else:
                sec_cache.append('<a href="%d.html">上一篇</a>&nbsp;|&nbsp;'
                                 % (sec_count - 1))
                sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')
                sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;'
                                 % (sec_count + 1))
            sec_cache.append('<a name="bottom" href="#">回页首</a></p>')
            sec_cache.append('</body></html>')
            output.writelines(sec_cache)
            output.flush()
            output.close()
            sec_cache = []
            sec_count += 1

            # create a new section
            output = open('%s\\%d.html' % (target_path, sec_count), 'w')
            output.writelines([section_head % (line, line)])
            idx_cache.append('<li><a href="%d.html">%s</a></li>'
                             % (sec_count, line))
        else:
            sec_cache.append('<p style="text-align:left;">%s</p>'
                             % escape_xml(line))
            
    # write rest lines
    sec_cache.append('<a href="%d.html">下一篇</a>&nbsp;|&nbsp;'
                     % (sec_count - 1))
    sec_cache.append('<a href="index.html">目录</a>&nbsp;|&nbsp;')
    sec_cache.append('<a name="bottom" href="#">回页首</a></p></body></html>')
    output.writelines(sec_cache)
    output.flush()
    output.close()
    sec_cache = []

    # write the menu
    output = open('%s\\index.html' % (target_path), 'w')
    menu_head = '%s 目录' % novel_title
    output.writelines([section_head % (menu_head, menu_head), '<ul style="text-align:left">'])
    output.writelines(idx_cache)
    output.writelines(['</ul><body></html>'])
    output.flush()
    output.close()
    inx_cache = []
    
    print 'completed. %d chapter(s) in total.' % sec_count

if __name__ == '__main__':
    main()


将其中的
引用
source_path = 'f:\\佣兵天下.txt'
修改成TXT小说的路径,再根据情况,稍微修改下匹配章节标题的正则
引用
section_re = re.compile(r'^\s*第.+卷\s+.*$')
即可。脚本会在小说所在目录生成一个"文件名_html"的文件夹用于存放节割后的文件。

刚刚接触python,感觉写的代码很不精简,请大家帮忙改进下。

你可能感兴趣的:(html,python,cache,OS,脚本)