用scrapy爬取妹纸网站图片,并储存在本地

# -*- coding: utf-8 -*-爬虫入口
import scrapy
from scrapypc.items import ScrapypcItem


class AppSpider(scrapy.Spider):
    name = 'app'
    allowed_domains = ['meizitu.com']
    # allowed_domains = []
    start_urls = ['http://www.meizitu.com/a/more_1.html']
    # rules = Rule()
    def parse(self, response):
        item =ScrapypcItem()
        item['image_name'] = response.xpath('//img//@src').extract()  # 提取图片链接
  
        yield item
#构建翻页
        for i in range(2,73):
            new_url ="http://www.meizitu.com/a/"+"more_%d.html" %i
            if new_url:
                yield scrapy.Request(new_url, callback=self.parse)

#settings模块设置

ITEM_PIPELINES = {
   # 'scrapypc.pipelines.ScrapypcPipeline': 300,
   'scrapy.pipelines.images.ImagesPipeline': 1,
}

# 要保存的字段,即在 Item 类中的字段名为 image_url
IMAGES_URLS_FIELD = 'image_name'

import os
# 配置数据保存路径,为当前工程目录下的 images 目录中
project_dir = os.path.abspath(os.path.dirname(__file__))
IMAGES_STORE = os.path.join(project_dir, 'images')

你可能感兴趣的:(python笔记,爬虫)