python 单线程爬取小说
import requests
from bs4 import BeautifulSoup
import os
if __name__=='__main__':
target="https://www.bequgezw.com/8/8396/"
save_path = 'Z:\\text'
index_path='https://www.bequgezw.com'
req=requests.get(url=target)
req.encoding = 'utf-8'
soup=BeautifulSoup(req.text,"html.parser")
list_tag=soup.div(id="list")
story_title=list_tag[0].dl.dt.string
dir_path=save_path
if not os.path.exists(dir_path):
os.path.join(save_path,story_title)
os.mkdir(dir_path)
for dd_tag in list_tag[0].dl.find_all('dd'):
chapter_name=dd_tag.a.string
print(chapter_name)
chapter_url=index_path+dd_tag.a.get('href')
chapter_req = requests.get(url=chapter_url)
chapter_req.encoding = 'utf-8'
chapter_soup = BeautifulSoup(chapter_req.text, "html.parser")
content_tag = chapter_soup.div.find(id="content")
content_text = str(content_tag.text.replace('\xa0','\n'))
with open(save_path+'\\'+story_title+'.txt', 'a') as f:
f.write(chapter_name)
f.write(content_text)