pipelines内置方法

from scrapy.exceptions import DropItem

class DaboPipeline(object):
    def __init__(self,file_path):
        self.file_path = file_path

        self.f = None

    @classmethod
    def from_crawler(cls, crawler):
        """
        初始化时候,用于创建pipeline对象
        :param crawler:
        :return:
        """
        # val = crawler.settings.getint('MMMM')
        file_path = crawler.settings.get('FILE_PATH')
        return cls(file_path)

    def process_item(self, item, spider):
        """
        :param item:
        :param spider:
        :return:
        """
        # 3.打开
        # f = open('news.log', mode='a+')
        # f.write(item.url,item.content)
        # f.close()
        if spider.name == "renjian":
            self.f.write(item['url']+'\n')
        # return item
        #如果不想下一个pipelines接收参数直接抛错
        raise DropItem()

    def open_spider(self, spider):
        """
        爬虫开始执行时,调用
        :param spider:
        :return:
        """
        self.f = open(self.file_path,'a+')

    def close_spider(self, spider):
        """
        爬虫关闭时,被调用
        :param spider:
        :return:
        """
        self.f.close()

你可能感兴趣的:(pipelines内置方法)