python 爬取排行榜小说和文本

# -*- coding: utf-8 -*-
import scrapy
import sys
sys.path.append("D:\\pycodes\\novel")


class XiaoshuoSpider(scrapy.Spider):
    name = 'xiaoshuo'
    start_urls = ['https://www.qu.la/paihangbang/']
    novel_list=[]

    def parse(self, response):

        global i
        i=0

        for sel in response.xpath("//div[@ class='topbooks']"):

            book_name=sel.xpath(".//a/text()").extract()
            book_hrefs=sel.xpath(".//a/@href").extract()

            for href in book_hrefs:
                count=0
                url = 'https://www.qu.la'+href

                count+=1    
                yield scrapy.Request(url,callback=self.parse_book)


    def parse_book(self,response):
        volume_hrefs=response.xpath("//dd/a/@href").extract()
        volume_name=response.xpath("//dd/a/text()").extract()

        for href in volume_hrefs:
            count = 0

            url='https://www.qu.la'+href
            count+=1
            yield scrapy.Request(url,callback=self.parse_content)

    def parse_content(self,response):
        filename=response.xpath("//a[@href='./']/text()").extract_first()
        volumename=response.xpath("//h1/text()").extract_first()
        print (filename,volumename)
        body=response.xpath("//div[@id='content']/text()").extract()
        content = "".join(body).strip().replace("\u3000"," ")   
        item['name']=str(filename)
        item['volume']=str(volumename)
        item['text']=str(content)
        path="D:/novels/"
        if i == 1:
            f=open(path+"xh/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i==2:
            f=open(path+"wx/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i == 3:
            f=open(path+"ds/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i == 4:
            f=open(path+"ls/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i ==5:
            f=open(path+"kh/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i == 6:
            f=open(path+"wy/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        elif i == 7:
            f=open(path+"ns/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")
        else:
            f=open(path+"wb/"+"{}.txt".format(filename),"a",encoding='utf-8')
            f.write(str(volumename)+"\n"+str(content)+"\n\n")

你可能感兴趣的:(python 爬取排行榜小说和文本)