python 网络爬虫-批量打包下载小说

import urllib.request
import re
import os
import time
mulu='https://www.9dxs.com/2/2308/index.html'
response = urllib.request.urlopen(mulu)
html=response.read().decode('gbk')



def get_zhang(lianjie,biaoti):
    zhang ='https://www.9dxs.com/2/2308/'+lianjie
    response = urllib.request.urlopen(zhang)
    html=response.read().decode('gbk')
    pattern=re.compile(u'(
)(.*?)(
.*
)',re.S) zhang_html=pattern.findall(html) for zhengwen in zhang_html: text=re.sub( '<.*?>', '', zhengwen[1]) text=re.sub( ' ', ' ', text) return text
def baocun(biaoti,zhengwen):
    fo = open('帝临鸿蒙.txt', "a+")         #打开小说文件
    fo.write('\r\n' + biaoti + '\r\n'+zhengwen)    
    fo.close() 
pattern=re.compile(u'
  • (.*?)
  • '
    ) mulu=pattern.findall(html) for zhang in mulu: biaoti=zhang[1] zhengwen=get_zhang(zhang[0],zhang[1]) baocun(biaoti,zhengwen) time.sleep(5) #不要太快防止给人家造成负担

    你可能感兴趣的:(python)