利用python下载笔趣阁全部的小说

import requests
from  lxml import etree
from random import choice
list_u=['Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 920)',
      'Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; SAMSUNG; SGH-i917)',
      'Mozilla/4.0 (compatible; MSIE 7.0; Windows Phone OS 7.0; Trident/3.1; IEMobile/7.0; LG; GW910)',
      '	Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0',
      '	Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0',
      '	Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0',
      'Mozilla/5.0 (Android; Tablet; rv:14.0) Gecko/14.0 Firefox/14.0',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1']
data={"User-Agent":choice(list_u)}


class M:
    def start_request(self):
        response=requests.get('http://www.xbiquge.la/xiaoshuodaquan/',data=data)
        xml=etree.HTML(response.text)


        # 获取全部的书名和网址
        name_list=xml.xpath('//div[@class="novellist"]//li/a/text()')
        name_url_list=xml.xpath('//div[@class="novellist"]//li/a/@href')
        for name,name_url in zip(name_list,name_url_list):
            self.next_request(name,name_url)

    def next_request(self,name,name_url):
        response=requests.get(name_url,data=data)
        xml = etree.HTML(response.content.decode("utf-8"))

        # 获取章节名和地址
        title_list=xml.xpath('//div[@id="list"]//dd/a/text()')
        title_url_list=xml.xpath('//div[@id="list"]//dd/a/@href')

        for title,title_url in zip(title_list,title_url_list):
            print("正在保存:{}".format(title))
            with open(name+".txt", "a+", encoding="utf-8")as f:
                f.write("\n"+title+"\n\n")
            self.save(name,title_url)


    def save(self,name,title_url):
        response=requests.get("http://www.xbiquge.la"+title_url,data=data)
        xml = etree.HTML(response.content.decode("utf-8"))
        content = xml.xpath('//div[@id="content"]/text()')#获取小说内容
        content=(str(content)[2:-2].replace('\\xa0\\xa0\\xa0\\xa0','').replace("\\r'",'').replace(", ', '",''))
        # for i in content:
        # print(content)
        with open(name+".txt", "a+", encoding="utf-8")as f:
                f.write(content)
m=M()
m.start_request()

你可能感兴趣的:(利用python下载笔趣阁全部的小说)