scrapy爬虫--小练习

scrapy startproject example

tree

├── example

│   ├── __init__.py

│   ├── __init__.pyc

│   ├── items.py

│   ├── middlewares.py

│   ├── pipelines.py

│   ├── settings.py

│   ├── settings.pyc

│   └── spiders

│      ├── book_spider.py

│      ├── book_spider.pyc

│      ├── __init__.py

│      └── __init__.pyc

cd example

cd spider

vim book_spider.py


#-*- coding: utf-8 -*-

import scrapy

class BooksSpider(scrapy.Spider):

#每个爬虫都有相应的标识符

name = "book"

#定义开始爬取的起始点 可以有多个

start_urls = ['http://books.toscrape.com/']

def parse(self, response):

for book in response.css('article.product_pod'):

name = book.xpath('./h3/a/@title').extract_first()

price = book.css('p.price_color::text').extract_first()

yield {

'name':name,

'price':price

}

next_url = response.css('ul.pager li.next a::attr(href)').extract_first()

if next_url:

next_url = response.urljoin(next_url)

yield scrapy.Request(next_url,callback=self.parse)

这个http://books.toscrape.com/可以用来练习爬虫

scrapy crawl book -o book.csv


scrapy爬虫--小练习_第1张图片


scrapy爬虫--小练习_第2张图片

你可能感兴趣的:(scrapy爬虫--小练习)