'''
Author: tang
Date: 2016/03/07
MongoDB Incremental Backup oplog.rs to other serverDB
'''
import time
import json
import pymongo
import datetime
import os
import sys
import bson
def init_back_database(p_db_name):
'''init back desc db'''
dest_conn.get_database(p_db_name).create_collection('oplog.rs',autoIndexId=False)
dest_conn.get_database(p_db_name).get_collection('oplog.rs').create_index([("ts",pymongo.ASCENDING)],unique=True)
dest_conn.get_database(p_db_name).get_collection('oplog.rs').create_index([('ns',pymongo.ASCENDING)])
dest_conn.get_database(p_db_name).get_collection('oplog.rs').insert({"ts":bson.timestamp.Timestamp(0,1)})
def inc_oplog(p_db_name):
"""copy source server oplog to backup db"""
#get last_timestamp
row_count = dest_conn.get_database(p_db_name).get_collection('oplog.rs').count()
if row_count==0:
init_back_database(p_db_name)
last_timestamp = bson.timestamp.Timestamp(int(time.time())-24*3600,1)
else:
cur_oplog_rs = dest_conn.get_database(p_db_name).get_collection('oplog.rs').find({},{"ts":1}).sort([("ts",-1)]).limit(1)
for row in cur_oplog_rs:
last_timestamp = row["ts"]
#copy oplog
cur_oplog = source_conn.get_database('local').get_collection('oplog.rs').find({"ts":{"$gt":last_timestamp},"op":{"$in":['i','d','u']}}).limit(100000)
for row in cur_oplog:
#insert
row_data = row
#change dist2str save, bypass: field name not start $
if row_data.has_key('o'):
row_data['o'] = str(row_data['o'])
if row_data.has_key('o2'):
row_data['o2'] = str(row_data['o2'])
#print row_data
dest_conn.get_database(p_db_name).get_collection('oplog.rs').insert(row_data)
#end copy oplog
#end inc_oplog
def replay_oplog(p_db_name,p_tb_name,p_start_ts,p_end_ts):
'''use oplog log row,replay data to every collections'''
#copy oplog
cur_oplog = source_conn.get_database(p_db_name).get_collection('oplog.rs').find({"ts":{"$gt":p_last_ts}})
for row in cur_oplog:
db_name = row["ns"].split('.')[0]
tbl_name = row["ns"].split('.')[1]
#insert
if row['op']=='i':
document_dist = eval(row['o'])
dest_conn.get_database(db_name).get_collection(tbl_name).insert(document_dist)
#update
if row.has_key('b'):
muli_flg = row['b']
else:
muli_flg = False
if row['op']=='u':
document_dist = eval(row['o'])
if row.has_key('o2'):
document2_dist = eval(row['o2'])
dest_conn.get_database(db_name).get_collection(tbl_name).update(document2_dist,document_dist,multi=muli_flg)
else:
dest_conn.get_database(db_name).get_collection(tbl_name).update({},document_dist,multi=muli_flg)
#delete
if row['op']=='d':
document_dist = eval(row['o'])
dest_conn.get_database(db_name).get_collection(tbl_name).remove(document_dist,multi=muli_flg)
#end def replay_oplog
if __name__=='__main__':
btype = sys.argv[1]
source_host = sys.argv[2]
desc_host = sys.argv[3]
desc_dbname = sys.argv[4]
last_ts = sys.argv[5]
source_conn = pymongo.MongoClient(['mongodb://%s'%source_host])
dest_conn = pymongo.MongoClient(['mongodb://%s'%desc_host])
if btype in ['b','back','bak']:
inc_oplog(desc_dbname)
if btype in ['r','rest','restore']:
replay_oplog(desc_dbname, last_ts)
All of the backup methods outlined must make a full copy of the data, even if very little
of it has changed since the last backup. If you have data that is very large relative to the
amount that is being written, you may want to look into incremental backups.
Instead of making full copies of the data every day or week, you take one backup and
then use the oplog to back up all operations that have happened since the backup. This
technique is much more complex than the ones described above, so prefer them unless
incremental backups are absolutely necessary.
This technique requires two machines, A and B, running mongod. A is your main
machine (probably a secondary) and B is your backup machine:
1. Make a note of the latest optime in A’s oplog:
> op = db.oplog.rs.find().sort({$natural: -1}).limit(1).next();
> start = op['ts']['t']/1000
Keep this somewhere safe—you’ll need it for a later step.
2. Take a backup of your data, using one of the techniques above to get a point-intime backup. Restore this backup to the data directory on B.
3. Periodically add any operations that have happened on A to B’s copy of the data.
There is a special tool that comes with MongoDB distributions that makes this easy:
mongooplog (pronounced mon-goop-log) which copies data from the oplog of one
server and applies it to the data set on another. On B, run:
$ mongooplog --from A --seconds 1234567
--seconds should be passed the number of seconds between the start variable
calculated in step 1 and the current time, then add a bit (better to replay operations
a second time than miss them).
This keeps your backup relatively up-to-date with your data. This technique is sort of
like keeping a secondary up-to-date manually, so you may just want to use a slavedelayed secondary instead