需求:有一个数据库,其中一些表每天写入百万条。现要求将某段时间的表数据保存到本地。
最开始使用fetchall(),导致服务器直接宕机。如果使用mysqldump命令,会锁表,导致不能写入数据。后来发现python 的MySQLdb提供了fetchmany()的函数,可以控制每次获取的行数。
以下的代码可以根据where条件读取数据库,而不给服务器造成很大压力。
# coding=utf-8
# created on 2016/1/23 by zhaohf
from __future__ import generators
import MySQLdb, traceback, sys, time, logging
from datetime import datetime, timedelta
MYSQLDB = {'db': 'zhf', 'user': 'zhf', 'passwd': 'root',
'host': 'localhost', 'port': 3306}
reload(sys)
sys.setdefaultencoding('utf-8')
"""
备份MySQL表
"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
fh = logging.FileHandler('log.backup')
fh.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
fh.setFormatter(formatter)
logger.addHandler(fh)
def result_iter(cursor, size=100000):
buf = cursor.fetchmany(size)
while len(buf):
for i in buf:
yield i
del buf
logger.info("processed " + str(size))
buf = cursor.fetchmany(size)
def timer(start, end):
hours, rem = divmod(end - start, 3600)
minutes, seconds = divmod(rem, 60)
return "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds)
if __name__ == '__main__':
args = sys.argv
if len(args) < 4:
print 'Usage : python xx.py table_name start_date end_date'
sys.exit(0)
try:
table, start_date, end_date = args[1], args[2], args[3]
logger.info('START BACKUP %s ...' % table)
start = time.time()
db = MySQLdb.connect(**MYSQLDB)
while start_date <= end_date:
cursor = db.cursor()
sql = "select * from %s WHERE created_at > '%s 00:00:00' and created_at < '%s 23:59:59'" % (
table, start_date, start_date)
logger.info(sql)
row_cnt = cursor.execute(sql)
for row in result_iter(cursor):
print '\t'.join([str(x) for x in row])
start_date = (datetime.strptime(start_date, '%Y-%m-%d') + timedelta(1)).strftime('%Y-%m-%d')
logger.info('END BACKUP %s ...' % table)
logger.info('TIME USED %s.' % timer(start, time.time()))
except:
print traceback.format_exc()
sys.exit(0)