爬取斗破苍穹小说

爬取斗破苍穹小说(正则表达式)

import requests  #导入相应库
import re

headers = {
     
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)  Chrome/83.0.4103.116 Safari/537.36'
}   #加入请求头

def Get_text(url):
    try:
        res = requests.get(url, headers=headers)      
        chapters = re.findall('

(.*?)

'
,res.content.decode('utf-8')) #utf-8编码 print(chapters[0]) contents = re.findall('

(.*?)

'
,res.content.decode('utf-8'),re.S) for content in contents[1:-2]: print(content) except: pass if __name__ == '__main__': #程序主入口 urls = ['https://www.doupobook.com/doupo/{}.html'.format(str(i)) for i in range(2,1665)] #构造多页URL for url in urls: Get_text(url)

爬取斗破苍穹小说_第1张图片

你可能感兴趣的:(爬虫,python)