python : 书签 转换为 HTML

阅读更多
bm2htm.py mybatis.txt
# -*- coding: cp936 -*-
# 先将 bookmark 转码: unicode -> gbk
import os, sys
import re

if len(sys.argv) ==2:
    f1 = sys.argv[1]
else:
    print 'usage: bm2htm.py file1.txt '
    sys.exit(4)

if not os.path.exists(f1):
    print 'ERROR: %s not found\n' % f1
    sys.exit(4)

fn,ext = os.path.splitext(f1)
if ext !='.txt':
    print 'ERROR: %s ext is not .txt\n' % f1
    sys.exit(4)

fp = open(f1,'r')
f2 = fn +'.htm'
fp2 = open(f2,'w')
fp2.write('\n')
fp2.write('  \n')
fp2.write('   \n')
fp2.write('    \n')
fp2.write('    深入浅出 MyBatis技术原理与实战 \n')
fp2.write('  \n')
fp2.write('\n')
# set 参数
path ='mybatis'
pre ='m'  # prefix
pat = re.compile("^[1-9A-Z]")
end = 272 # page max value

for line in fp:
    t = line.strip().replace(' ',' ').split()  # A1A1
    if line.strip() =='':
        fp2.write('\n')
        continue
    if re.match(pat,line):        
        if len(t)>=3:
            d = int(t[-1])
            aline = '  
  • %s %s %s
  • \n' % (t[0],t[1],path,pre,end,d,t[-1]) else: aline = '
  • %s %s
  • \n' % (t[0],t[1]) fp2.write(aline) elif len(t)==2: d = int(t[-1]) aline = '
  • %s %s
  • \n' % (t[0],path,pre,end,d,t[1]) fp2.write(aline) elif len(t)>=3: d = int(t[-1]) aline = '\n
      %s %s %s\n' % (t[0],t[1],path,pre,end,d,t[-1]) fp2.write(aline) else: aline = '
    \n
      '+line fp2.write(aline) # fp.close() fp2.write('
    \n') fp2.write('\n') fp2.write('\n') fp2.close()

    你可能感兴趣的:(python)