import requests #导入相应库
import re
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
} #加入请求头
def Get_text(url):
try:
res = requests.get(url, headers=headers)
chapters = re.findall('(.*?)
',res.content.decode('utf-8')) #utf-8编码
print(chapters[0])
contents = re.findall('(.*?)
',res.content.decode('utf-8'),re.S)
for content in contents[1:-2]:
print(content)
except:
pass
if __name__ == '__main__': #程序主入口
urls = ['https://www.doupobook.com/doupo/{}.html'.format(str(i)) for i in range(2,1665)] #构造多页URL
for url in urls:
Get_text(url)