爬取图书

# -*- coding: utf-8 -*-
import scrapy


class BooksSpider(scrapy.Spider):
    name = 'books'
    allowed_domains = ['www.books.toscrape.com']
    start_urls = ['http://books.toscrape.com/']

    def parse(self, response):
        for book in response.css("article.product_pod"):
            name = book.xpath("./h3/a/@title").extract()
            price = book.css('p.price_color::text').extract()
            yield {
                'name':name,
                "price":price
            }
        next_url = response.css('ul.pager li.next a::attr(href)').extract_first()
        if next_url:
            next_url = response.urljoin(next_url)
            yield scrapy.Request(next_url, callback=self.parse)


你可能感兴趣的:(python)