原创文章,欢迎转载。转载请注明:转载自 祥的博客
原文链接:https://blog.csdn.net/humanking7/article/details/90176191
Python获取起点小说网的更新情况
每次想知道网络小说的更新情况,就得去贴吧逛逛,然后去起点网瞧瞧,这样很浪费时间,尤其是我这种一逛贴吧,就出不来的选手,所以写了一个脚本,用来获取小说的更新情况,不需要点开网页,直接运行bat文件
就ok了。
对于不同的小说,改一下url
就行了。
那个,我也觉得看网络小说不好,而且也在看一些严肃文学作品,但是无奈啊,我就是一个脱离不了快餐文学的人。。。
文件名:getYuanZunInfo.py
# 获取元尊的数据
# url = 'https://book.qidian.com/info/1014920025'
import requests
import re
from bs4 import BeautifulSoup
import bs4
import os
import time
class getYuanZunInfo():
def __init__(self,
url,
timeout=10,
filePath='yuanZun.txt',
mode='a',
fileEncode='utf-8'):
self.url = url
self.timeout = timeout
self.filePath = filePath
self.mode = mode
self.fileEncode = fileEncode
self.li_dataInfo = []
# 写到文本文件中
def writeText2File(self, content):
filePath = self.filePath
mode = self.mode
fileEncode = self.fileEncode
f = open(filePath, mode, encoding=fileEncode)
f.write(content)
f.close()
# 获取html
def getHTMLText(self):
url = self.url
timeout = self.timeout
flags = False
try:
kv = {'user-agent': 'Mozilla/5.0'}
r = requests.get(url, headers=kv, timeout=timeout)
# print(r.status_code)
r.raise_for_status()
r.encoding = r.apparent_encoding
flags = True
return flags, r.text
except requests.exceptions.Timeout as e:
flags = False
return flags, str(e)
except requests.exceptions.ConnectionError as e:
flags = False
return flags, str(e)
except requests.exceptions.HTTPError as e:
flags = False
return flags, str(e)
# 解析html
def parseYuanZun(self):
[flg, html] = self.getHTMLText()
try:
soup = BeautifulSoup(html, "html.parser")
# 章节ID
chapterID = soup.find(id='J-catalogCount').text
chapterID = re.sub("\D", "", chapterID)
# 章节详细信息
chapterUpdate = soup.find('li', attrs={"class": "update"})
# 章节名
chapterTitle = chapterUpdate.find(
'a', attrs={
"class": "blue"
}).get("title")
# 更新时间
lastUpdateTime = chapterUpdate.find(
'em', attrs={
"class": "time"
}).text
# 更新到全局变量
self.li_dataInfo.append(chapterID)
self.li_dataInfo.append(chapterTitle)
self.li_dataInfo.append(lastUpdateTime)
# 显示+保存
self.show_save_Info()
except:
print('解析html失败!')
def show_save_Info(self):
# show
print("==================")
# 章节ID
# 章节名
# 最新一次更新时间
str_show = "更新到: %s 章\n%s\n\n最新更新时间: %s" % (
self.li_dataInfo[0], self.li_dataInfo[1], self.li_dataInfo[2])
print(str_show)
print("==================")
# self.writeText2File(str_show) # save 到txt
if __name__ == "__main__":
url = 'https://book.qidian.com/info/1014920025'
yuanZun = getYuanZunInfo(url)
yuanZun.parseYuanZun()
文件名:getYuanZun.bat
::获取最新的小说数据
python getYuanZunInfo.py
pause()