scrapy mongodb 数据存储

import traceback
import time
import motor.motor_asyncio

try:
    # Python 3.x
    from urllib.parse import quote_plus
except ImportError:
    # Python 2.x
    from urllib import quote_plus

class MongoPipeline(object):
    def __init__(self, client, db_name, collection, settings):
        self.client = client
        self.db = self.client[db_name]
        self.collection = self.db[collection]
        self.settings = settings

    @classmethod
    def from_settings(cls, settings):
        dbparms = dict(
            host = settings["MONGODB_HOST"],
            port = settings["MONGODB_PORT"],
            user = settings["MONGODB_USER"],
            passwd = settings["MONGODB_PASSWORD"],
        )
        if dbparms['user']:
            uri = "mongodb://%s:%s@%s:%s" % (
                quote_plus(dbparms['user']), quote_plus(dbparms['password']), dbparms['host'], dbparms['port'])
        else:
            uri = "mongodb://%s:%s" % (dbparms['host'], dbparms['port'])
        client = motor.motor_asyncio.AsyncIOMotorClient(uri)
        db_name = settings["MONGODB_DBNAME"]
        collection = settings['MONGODB_COLLECTION']

        return cls(client, db_name, collection, settings)

    def insert_one(self, sql):
        try:
            self.collection.insert_one(sql)
        except Exception as e:
            traceback.print_exc()

    def insert_many(self, multi_sql:list):
        try:
            self.collection.insert_many(multi_sql)
        except Exception as e:
            traceback.print_exc()

    def process_item(self, item, spider):
        if not self.settings['MONGODB_PIPLINES_ENABLED']: return
        mongodb_sql = item.get_mongodb_insert_sql(spider)
        for sql in mongodb_sql:
            self.insert_one(sql)

    def close_spider(self, spider):
        self.client.close()

你可能感兴趣的:(scrapy mongodb 数据存储)