基于PyQt5和requests的小说在线阅读器(小说爬虫)

效果:

基于PyQt5和requests的小说在线阅读器(小说爬虫)_第1张图片
基于PyQt5和requests的小说在线阅读器(小说爬虫)_第2张图片
基于PyQt5和requests的小说在线阅读器(小说爬虫)_第3张图片

  • 绝对布局
  • 无章节预加载
  • 在不关闭的情况下再次搜索存在bug

代码:

python部分:
import sys
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *

import random
import requests
from lxml import etree
from bs4 import BeautifulSoup
from urllib.parse import quote


def dataGet(url):
    """网页源代码获取"""

    user_agent_list = [
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
    ]
    user_agent = random.choice(user_agent_list)
    headers = {'User-Agent': user_agent}
    i = 0
    while i < 4:  # 4s超时, 4次重试
        try:
            response = requests.get(url, headers, timeout=4)
            response.encoding = 'gbk'
            return response.text
        except requests.exceptions.RequestException:
            i += 1


def novelSearch(data):
    """在小说搜索网页获取小说信息"""

    soup = BeautifulSoup(data, features='lxml')
    lis = soup.find_all('li')
    novelList = []
    novelInfoList = []
    linkList = []
    for li in lis:
        html = etree.HTML(str(li))
        class_ = html.xpath('//span[@class="s1"]/text()')
        name = html.xpath('//span[@class="s2"]/a/text()')
        link = html.xpath('//span[@class="s2"]/a/@href')
        new = html.xpath('//span[@class="s3"]/a/text()')
        author = html.xpath('//span[@class="s4"]/text()')
        time = html.xpath('//span[@class="s5"]/text()')
        now = html.xpath('//span[@class="s7"]/text()')
        if class_ and now and new:
            novelList.append(name[0])
            novelInfoList.append([class_[0], name[0], link[0], new[0], author[0], time[0], now[0]])
            linkList.append(link[0])
    return [novelList, novelInfoList, linkList]


def chapterGet(data, link):
    """在目录界面获取小说章节"""

    html = etree.HTML(data)
    chapters_name = html.xpath('//dl/dd/a/text()')
    chapters_link = html.xpath('//dl/dd/a/@href')
    chapters = []
    for i, j in zip(chapters_name, chapters_link):
        chapters.append([i, link + j])
    return chapters


def contentGet(data):
    """获取小说内容"""

    string = data.replace('
'
, '').replace('
'
, '') html = etree.HTML(string) title = html.xpath('//div[@class="bookname"]/h1/text()') content = html.xpath('//div[@id="content"]/text()') return [title[0], content[0]] def Del_line(string): """删除文本空行""" res = [x for x in string if x.split()] string = "".join(res) return string class SearchThread(QThread): """搜索线程""" _signal1 = pyqtSignal(list) _signal2 = pyqtSignal(list) _signal3 = pyqtSignal() def __init__(self): super(SearchThread, self).__init__() def __del__(self): self.destroyed() def set_name(self, string): self.novelName = string def run(self): searchURL = 'https://www.52bqg.com/modules/article/search.php?searchkey=' + self.novelName url = quote(searchURL, safe=";/?:@&=+$,", encoding="gbk") data = dataGet(url) lists = novelSearch(data) if lists[0]: self._signal1.emit(lists[0]) self._signal2.emit(lists[1]) else: self._signal3.emit() class ChapterThread(QThread): """搜索线程""" _signal1 = pyqtSignal(list) def __init__(self): super(ChapterThread, self).__init__() def __del__(self): self.destroyed() def set_link(self, string): self.link = string def run(self): data = dataGet(self.link) chapters = chapterGet(data, self.link) self._signal1.emit(chapters) class ReadThread(QThread): """搜索线程""" _signal1 = pyqtSignal(list) def __init__(self): super(ReadThread, self).__init__() def __del__(self): self.destroyed() def set_link(self, string): self.link = string def run(self): data = dataGet(self.link) content = contentGet(data) self._signal1.emit(content) class MainWin(QWidget): novelList = [] novelInfoList = [] chapterList = [] def __init__(self): super().__init__() self.setFixedSize(1500, 900) self.setWindowTitle('小说在线阅读器') self.setWindowIcon(QIcon('./static/images/web.png')) self.label = QLabel('小说名称:', self) self.label.move(20, 30) self.input = QLineEdit(self) self.input.setPlaceholderText('输入小说名称') self.input.move(120, 20) self.btu = QPushButton('搜索', self) self.btu.move(340, 20) self.label_1 = QLabel('搜索结果', self) self.label_1.setObjectName('search_header') self.label_1.move(20, 80) self.list = QListWidget(self) self.list.clicked.connect(self.choice) self.list.resize(372, 300) self.list.move(20, 121) self.label_2 = QLabel('章节列表', self) self.label_2.setObjectName('search_header') self.label_2.move(20, 430) self.list_1 = QListWidget(self) self.list_1.clicked.connect(self.chapterChoice) self.list_1.resize(372, 400) self.list_1.move(20, 470) self.label_3 = QLabel('章节内容', self) self.label_3.setObjectName('reader') self.label_3.move(430, 20) self.reader = QTextEdit(self) self.reader.setReadOnly(True) self.reader.resize(1052, 750) self.reader.move(430, 61) self.btu_1 = QPushButton('上一章', self) self.btu_1.setObjectName('chapter') self.btu_1.move(830, 820) self.btu_2 = QPushButton('下一章', self) self.btu_2.setObjectName('chapter') self.btu_2.move(970, 820) self.btu.clicked.connect(self.search) self.btu_1.clicked.connect(self.previousChapter) self.btu_2.clicked.connect(self.nextChapter) self.thread = SearchThread() self.thread._signal1.connect(self.callback_1) self.thread._signal2.connect(self.callback_2) self.thread._signal3.connect(self.nothing) self.thread_1 = ChapterThread() self.thread_1._signal1.connect(self.callback_3) self.thread_2 = ReadThread() self.thread_2._signal1.connect(self.callback_4) file = QFile('./static/qss/win.css') file.open(QFile.ReadOnly) qss = str(file.readAll(), encoding='utf-8') file.close() self.setStyleSheet(qss) self.show() def search(self): self.list.clear() self.list_1.clear() self.reader.clear() name = self.input.text() if name == '': QMessageBox.information(self, "提示", "请输入关键字", QMessageBox.Ok, QMessageBox.Ok) return self.thread.set_name(name) self.thread.start() def addNovel(self): for i in self.novelList: n = self.novelList.index(i) self.list.addItem(i) QApplication.processEvents() def addChapter(self): for i in self.chapterList: n = self.chapterList.index(i) self.list_1.addItem(i[0]) QApplication.processEvents() def callback_1(self, msg): self.novelList = msg self.addNovel() def callback_2(self, msg): self.linList = [] for i in msg: self.linList.append(i[2]) i[0] = '小说分类:' + i[0] i[1] = '小说名称:' + i[1] i[2] = '小说链接:' + i[2] i[3] = '最新章节:' + i[3] i[4] = '小说作者:' + i[4] i[5] = '最近更新时间:' + i[5] i[6] = '更新状态:' + i[6] self.novelInfoList.append([i[0], i[1], i[2], i[3], i[4], i[5], i[6]]) def nothing(self): QMessageBox.information(self, "提示", "未搜索到任何结果", QMessageBox.Ok, QMessageBox.Ok) def choice(self, index): r = index.row() string = '\n'.join(self.novelInfoList[r]) box = QMessageBox.information(self, "详细信息", string, QMessageBox.No | QMessageBox.Yes, QMessageBox.Yes) if box == QMessageBox.Yes: self.link = self.linList[r] self.thread_1.set_link(self.link) self.thread_1.start() def callback_3(self, msg): self.chapterList = msg self.addChapter() def chapterChoice(self, index): self.r = index.row() chapterLink = self.chapterList[self.r][1] self.thread_2.set_link(chapterLink) self.thread_2.start() def callback_4(self, msg): title = msg[0] content = msg[1] string = title + '\n' + content self.reader.setText(string) QApplication.processEvents() def nextChapter(self): self.r += 1 chapterLink = self.chapterList[self.r][1] self.thread_2.set_link(chapterLink) self.thread_2.start() self.list_1.setFocus() self.list_1.setCurrentRow(self.r) def previousChapter(self): self.r -= 1 chapterLink = self.chapterList[self.r][1] self.thread_2.set_link(chapterLink) self.thread_2.start() if __name__ == "__main__": app = QApplication(sys.argv) win = MainWin() sys.exit(app.exec_())
qss部分:
QLabel {
    font-size: 20px;
}

QLabel#search_header {
    font-size: 20px;
    border-top-left-radius: 10px;
    border-top-right-radius: 10px;
    border: 1px groove gray;
    width: 100px;
    padding: 10px;
    padding-left: 140px;
    padding-right: 140px;
}

QLabel#reader {
    font-size: 20px;
    border-top-left-radius: 10px;
    border-top-right-radius: 10px;
    border: 1px groove gray;
    width: 100px;
    padding: 10px;
    padding-left: 480px;
    padding-right: 480px;
}

QLineEdit {
    font-family: "Microsoft YaHei UI";
    border-radius: 7px;
    min-height: 30px;
    max-height: 30px;
    padding: 2px 4px;
    width: 200px;
    border: 1px groove gray;
}

QLineEdit:hover {
    background-color: #F7F7F7;
}

QTextEdit {
    font-family: "华文楷体";
    font-size: 25px;
    border-bottom-left-radius: 10px;
    border-bottom-right-radius: 10px;
    padding: 2px 4px;
    border: 1px groove gray;
}

QPushButton {
    font-family: "宋体";
    min-height: 30px;
    max-height: 30px;
    font-size: 20px;
    background-color: #EEEEEE;
    border-radius: 5px;
    width: 41px;
    border: 1px groove gray;
    padding: 2px 4px;
    transition:background-color 2000ms;
}

QPushButton:hover {
    background-color: #DDDDDD;
}

QPushButton:pressed {
    background-color: #C4C4C4;
}

QListWidget {
    border: 1px groove gray;
    border-bottom-left-radius: 10px;
    border-bottom-right-radius: 10px;
    padding: 2px 4px;
    font-size: 20px;
}

QPushButton#chapter {
    font-family: "Microsoft YaHei UI";
    min-height: 30px;
    max-height: 30px;
    font-size: 20px;
    background-color: #EEEEEE;
    border-radius: 5px;
    width: 70px;
    border: 1px groove gray;
    padding: 2px 4px;
    transition:background-color 2000ms;
}

QPushButton#chapter:hover {
    background-color: #DDDDDD;
}

QPushButton#chapter:pressed {
    background-color: #C4C4C4;
}

你可能感兴趣的:(基于PyQt5和requests的小说在线阅读器(小说爬虫))