爬虫学习,BeautifulSoup下载小说

import requests
from bs4 import BeautifulSoup


fp = open('./sanguo.txt', 'w', encoding='utf-8')

headers ={
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}


#获取章节url

url = 'http://shicimingju.com/book/sanguoyanyi.html'
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
chapter_text = response.text
chapter_soup = BeautifulSoup(chapter_text, 'lxml')
a_list = chapter_soup.select('.book-mulu > ul > li > a')  #定位所以符合要求的a标签

for a in a_list:
    title = a.string
    chapter_url = 'http://shicimingju.com' + a['href']

    response_txt = requests.get(url=chapter_url, headers=headers)
    response_txt.encoding = response_txt.apparent_encoding
    txt_text = response_txt.text
    txt_soup = BeautifulSoup(txt_text, 'lxml')
    div_tag = txt_soup.find('div', class_="chapter_content")
    content = div_tag.text
    fp.write(title + ':' + content + '\n')
    print(title, '保存成功')
fp.close()



你可能感兴趣的:(笔记)