小说下载_异步协程

"""
88读书网——独本下载    例:https://www.88dus.com/xiaoshuo/97/97208/
async版本,使用aiohttp执行多个异步请求

"""

from bs4 import BeautifulSoup
import requests
import os,time
import sys
import asyncio
from aiohttp import ClientSession


class books:
    def __init__(self):
        self.headers = {
            'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
        pass

    def star(self):
        soup = self.request(url)

        # 书名
        bookname = soup.find('div', class_='jieshao').find('div', class_='rt').find('h1').get_text()
        # 作者
        author = soup.find('div', class_='jieshao').find('div', class_='rt').find('div', class_='msg').find(
            'em').get_text()
        author = author[3:]

        # 判断是否有相同的书存在
        path = 'D:/娱乐/E小说/' + bookname + '_' + author + '.txt'
        exist = os.path.exists(path)
        if exist:
            print('\n《%s》_%s  已存在\n' % (bookname, author))
            sys.exit()
        print('\n《%s》_%s  下载开始\n' % (bookname, author))

        # 目录
        list = soup.find('div', class_='mulu').find_all('a')
        # print(list)

        tasks = []

        for i in list:
            print(i)
            href = url + i['href']
            # print(href)
            soup_href = asyncio.ensure_future(self.async_response(href))
            tasks.append(soup_href)
        result = loop.run_until_complete(asyncio.gather(*tasks))
        # print(result)

        try:
            for respon in result:
                respon = BeautifulSoup(respon,"lxml")
                content = respon.find('div', class_='yd_text2').get_text()            # 小说正文
                title = respon.find("div",class_="novel").find("h1").get_text()          # 章节标题
                file = open(path, 'a', encoding='utf-8')
                file.write('\n' + title + '\n\n\n' + content + '\n\n\n')
                file.close()
                print(bookname + '  ' + title + '--------下载完成!')
        except AttributeError as e :
            print("AttributeError:%s"%e)

        print('《' + bookname + '》' + '    下载完成!**************')
        print(path)

    # 解析器
    def request(self, url):
        request = requests.get(url, headers=self.headers)
        request.encoding = 'gbk'
        soup = BeautifulSoup(request.text, 'html5lib')
        # print(soup)
        return soup

    # 异步
    async def async_response(self,url):
        async with ClientSession() as session:
            async with session.get(url) as response:
                # print('Hello,This is asyncing:%s' % time.time())
                return await response.text(encoding="gb18030")

url = 'https://www.x88dushu.com/xiaoshuo/18/18713/'  # 88小说

loop = asyncio.get_event_loop()
books = books()

books.star()

你可能感兴趣的:(python爬虫,小说下载,异步,爬虫)