用python代码实现mongodb的同步

2019独角兽企业重金招聘Python工程师标准>>> hot3.png

import sys
import time
import datetime
import logging,logging.handlers
import exceptions
import pymongo
import bson
from pymongo import ReplaceOne
import myconfig as config


class MongoSynchronizer(object):
    """ MongoDB multi-source synchronizer.
    """
    def __init__(self, src_hostportstr, dst_hostportstr, **kwargs):
        """ Constructor.
        """
        if not src_hostportstr:
            raise Exception('src hostportstr is empty')
        if not dst_hostportstr:
            raise Exception('dst hostportstr is empty')

        self._src_hostportstr = src_hostportstr
        self._dst_hostportstr = dst_hostportstr
        self._src_mc = None
        self._dst_mc = None
        self._start_optime = None # if true, only sync oplog
        self._last_optime = None # optime of the last oplog has been replayed
        self._logger = logging.getLogger()
        self._start_optime = kwargs.get('start_optime')

        self._src_mc = pymongo.MongoClient(src_hostportstr)

        self._dst_mc = pymongo.MongoClient(dst_hostportstr)

    def __del__(self):
        """ Destructor.
        """
        if self._src_mc:
            self._src_mc.close()
        if self._dst_mc:
            self._dst_mc.close()

    def _sync(self):
        """ Sync databases and oplog.
        """
        if self._start_optime:
            self._logger.info("locating oplog, it will take a while")
            oplog_start = bson.timestamp.Timestamp(int(self._start_optime), 0)
            doc = self._src_mc['local']['oplog.rs'].find_one({'ts': {'$gte': oplog_start}})
            if not doc:
                self._logger.error('specified oplog not found')
                return
            oplog_start = doc['ts']
            self._logger.info('start timestamp is %s actually' % oplog_start)
            self._last_optime = oplog_start
            self._sync_oplog(oplog_start)


    def _sync_oplog(self, oplog_start):
        """ Replay oplog.
        """
        try:
            host, port = self._src_mc.address
            self._logger.info('try to sync oplog from %s on %s:%d' % (oplog_start, host, port))
            cursor = self._src_mc['local']['oplog.rs'].find({'ts': {'$gte': oplog_start}}, cursor_type=pymongo.cursor.CursorType.TAILABLE, no_cursor_timeout=True)
            #Tailable cursors are only for use with capped collections.
            if cursor[0]['ts'] != oplog_start:
                self._logger.error('%s is stale, terminate' % oplog_start)
                return
        except IndexError as e:
            self._logger.error(e)
            self._logger.error('%s not found, terminate' % oplog_start)
            return
        except Exception as e:
            self._logger.error(e)
            raise e

        self._logger.info('replaying oplog')
        n_replayed = 0
        while True:
            try:
                if not cursor.alive:
                    self._logger.error('cursor is dead')
                    raise pymongo.errors.AutoReconnect

                # get an oplog
                oplog = cursor.next()

                # guarantee that replay oplog successfully
                recovered = False
                while True:
                    try:
                        if recovered:
                            self._logger.info('recovered at %s' % oplog['ts'])
                            recovered = False
                        self._replay_oplog(oplog)
                        n_replayed += 1
                        if n_replayed % 1000 == 0:
                            self._print_progress(oplog)
                        break
                    except pymongo.errors.DuplicateKeyError as e:
                        # TODO
                        # through unique index, delete old, insert new
                        #self._logger.error(oplog)
                        #self._logger.error(e)
                        break
                    except pymongo.errors.OperationFailure as e:
                        # TODO
                        # through unique index, delete old, insert new
                        #self._logger.error(oplog)
                        #self._logger.error(e)
                        break
                    except pymongo.errors.AutoReconnect as e:
                        self._logger.error(e)
                        self._logger.error('interrupted at %s' % oplog['ts'])
                        self._dst_mc = pymongo.MongoClient(self._dst_hostportstr)
                        if self._dst_mc:
                            recovered = True
                    except pymongo.errors.WriteError as e:
                        self._logger.error(e)
                        self._logger.error('interrupted at %s' % oplog['ts'])
                        self._dst_mc = pymongo.MongoClient(self._dst_hostportstr)
                        if self._dst_mc:
                            recovered = True
            except StopIteration as e:
                # there is no oplog to replay now, wait a moment
                time.sleep(0.1)
            except pymongo.errors.AutoReconnect:
                self._src_mc.close()
                self._src_mc = pymongo.MongoClient(self._src_hostportstr)
                cursor = self._src_mc['local']['oplog.rs'].find({'ts': {'$gte': self._last_optime}}, cursor_type=pymongo.cursor.CursorType.TAILABLE, no_cursor_timeout=True)
                if cursor[0]['ts'] != self._last_optime:
                    self._logger.error('%s is stale, terminate' % self._last_optime)
                    return

    def _replay_oplog(self, oplog):
       self._replay_oplog_mongodb(oplog)

    def _print_progress(self, oplog):
        ts = oplog['ts']
        self._logger.info('sync to %s, %s' % (datetime.datetime.fromtimestamp(ts.time), ts))

    def _replay_oplog_mongodb(self, oplog):
        # Replay oplog on destination if source is MongoDB.
        # parse
        ts = oplog['ts']
        op = oplog['op'] # 'n' or 'i' or 'u' or 'c' or 'd'
        ns = oplog['ns']
        dbname = ns.split('.', 1)[0]
        if op == 'i': # insert
            collname = ns.split('.', 1)[1]
            self._dst_mc[dbname][collname].insert_one(oplog['o'])
            #self._dst_mc[dbname][collname].replace_one({'_id': oplog['o']['_id']}, oplog['o'], upsert=True)
        elif op == 'u': # update
            collname = ns.split('.', 1)[1]
            self._dst_mc[dbname][collname].update(oplog['o2'], oplog['o'])
        elif op == 'd': # delete
            collname = ns.split('.', 1)[1]
            self._dst_mc[dbname][collname].delete_one(oplog['o'])
        elif op == 'c': # command
            self._dst_mc[dbname].command(oplog['o'])
        elif op == 'n': # no-op
            pass
        else:
            self._logger.error('unknown operation: %s' % oplog)
        self._last_optime = ts

    def run(self):
        """ Start data synchronization.
        """
        # never drop database automatically
        # you should clear the databases manually if necessary
        try:
            self._sync()
        except exceptions.KeyboardInterrupt:
            self._logger.info('terminating')
def logger_init(filepath):
    """ Init logger for global.
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
    if filepath:
        handler_log = logging.handlers.RotatingFileHandler(filepath, mode='a', maxBytes=1024*1024*100, backupCount=3)
        handler_log.setFormatter(formatter)
        handler_log.setLevel(logging.INFO)
        logger.addHandler(handler_log)
    else:
        handler_stdout = logging.StreamHandler(sys.stdout)
        handler_stdout.setFormatter(formatter)
        handler_stdout.setLevel(logging.INFO)
        logger.addHandler(handler_stdout)

def main():
    logger_init(config.cf.get('conf','log'))
    syncer = MongoSynchronizer(
            config.cf.get('db','mongo_f_uri'),
            config.cf.get('db','mongo_t_uri'),
            start_optime=config.cf.get('conf','start_time'))
    syncer.run()

if __name__ == '__main__':
    main()

转载于:https://my.oschina.net/yagami1983/blog/807199

你可能感兴趣的:(数据库,python,网络)