环境:python3.6 + pyquery
目标链接:http://www.136book.com/huaqiangu/
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from pyquery import PyQuery as pq
url = 'http://www.136book.com/huaqiangu/'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
# 请求网页链接 获取页面源码
r = requests.get(url, headers=headers).text
doc = pq(r)
# 获取网页的全部章节链接 这里去掉了前面的最新章节部分 直接从第一章开始
# div#book_detail:nth-child(2) 选取的是第二个div的内容
links = doc('div#book_detail:nth-child(2) li a').items()
for link in links:
download_url = link.attr('href')
# 请求每个章节
download_page = requests.get(download_url, headers=headers).text
# 获取每一章节的源码
doc = pq(download_page)
# 获取每一章小说的内容
contents = doc('div#content').text()
with open('花千骨.txt', 'a+', encoding='utf8') as f:
f.write(link.text()+"\n\n")
f.write(contents+"\n\n")
print("写入文件完成!请查看")