scrapy 异步存储到mysql

为什么要用异步存储?
因为scrapy下载速度会高于存储速度(个人理解,还在改善中。)

from twisted.enterprise import adbapi
import pymysql

class TwistedMysqlPipeline
	    def __init__(self, pool):
        self.pool = pool

	#@classmethod是声明一个类方法,而平常见到的声明一个实例方法
       # cls代表这个类本身,self是表示该类的一个实例
    @classmethod
    def from_settings(cls, settings):
        parm = dict(
            host=settings['HOST'],
            user=settings['USER'],
            password=settings['PASSWORD'],
            db=settings['DB'],
            charset='utf8',
            cursorclass=pymysql.cursors.DictCursor,
        )
        pool = adbapi.ConnectionPool('pymysql', **parm)
        return cls(pool)

    def process_item(self, item, spider):
    	#这一步是执行异步存储
        query = self.pool.runInteraction(self.do_insect, item)
        query.addErrback(self.handle_error, item, spider)


    def do_insect(self, cursors, item):
        sql = """
        insert into tb_info (id, title, genre)
        values (%s, %s, %s)
        """
        cursors.execute(sql, (item['id'], item['title'], item['genre']))

    def handle_error(self, failure, item, spider):
        print(failure)
		


你可能感兴趣的:(Scrapy)