APScheduler:强大的任务调度工具,可以完成定时任务,周期任务等,它是跨平台的,用于取代Linux下的cron daemon或者Windows下的task scheduler。
内置三种调度调度系统:
# coding=UTF-8
import init_environ
from uuid import uuid4
from apscheduler.schedulers.background import BlockingScheduler
# 以下导包为项目的脚本或方法、函数,表示用于最终执行任务的具体方法
from apps.common.utils.utils_host_ip import get_host_ip
from apps.common.utils.utils_mysql import check_mysql_conn
from apps.common.utils.utils_log import log
from apps.web.api import cleanup_cron_expired
from apps.router.models import OrderSyncer
from apps.ncrm.models import Customer
from apps.mnt.models_monitor import MntCostSnap
from scripts.adgroup_top_rpt import prehalfmonth_adgroup_top
from apps.toolkit.schedule_monitor import monitor_record, init_schedule_status
from scripts.sync_app_comments import sync_comment
# 注意:以上导入执行的方法名称请保持唯一,如 main as func_A, main as func_B 可能会导致后面的统计重复
# 定时任务在多台服务器上执行时,可以记录服务器的内网ip,通过判断ip是否是当前服务器ip即可决定是否加入到本服务器的定时任务中去
run_server_list = [
"192.168.16.193", # server01内网ip
"192.168.16.194", # server02内网ip
]
JOB_LIST = [
{'func': sync_comment, 'trigger': 'cron', 'hour': '0,8-23', 'minute': 0,
"run_server": run_server_list[0],
'note': '每天0点、8点到23点,每个小时同步历史评论'},
{'func': cleanup_cron_expired, 'trigger': 'cron', 'hour': 0, 'minute': 30,
"run_server": run_server_list[0],
'note': '每天0点30分,清除掉过期15天内的用户所有数据'},
# day_of_week (0-6 or mon,tue,wed,thu,fri,sat,sun) - (表示一周中的第几天,既可以用0-6表示也可以用其英语缩写表示)
{'func': prehalfmonth_adgroup_top, 'trigger': 'cron', 'day': 'last', 'hour': 21, 'minute': 30,
"run_server": run_server_list[0],
'note': '每月的最后一天21:30,统计本月1号到15号宝贝top榜报表'},
{'func': MntCostSnap.save_rptsnap, 'trigger': 'cron', 'hour': 22, 'minute': 30,
"run_server": run_server_list[1],
'note': '每天22点30分,保存托管计划快照'},
{'func': check_mysql_conn(OrderSyncer.sync_order_from_top), 'trigger': 'interval', 'minutes': 3,
"run_server": run_server_list[1],
'note': '每3分钟,调用淘宝同步接口同步订单'},
{'func': Customer.refresh_latest_4inservice, 'trigger': 'cron', 'day_of_week': 5, 'hour': 21, 'minute': 00,
"run_server": run_server_list[1],
'note': '每周六21点,刷新当前服务中客户的服务结束时间、最新版本、最高版本(业务中也有刷新)'},
]
# 详细参数说明参考:http://apscheduler.readthedocs.org/en/latest/modules/schedulers/base.html#apscheduler.schedulers.base.BaseScheduler.add_job
def main():
scheduler = BlockingScheduler()
host_ip = get_host_ip() # 取本机ip地址
job_note_dict = dict()
for job in JOB_LIST:
if host_ip != job.pop('run_server'):
# 遍历JOB_LIST,判断该job是否是需要在本机执行,否则跳过判断下一个job(删除"run_server"是防止schedule在add_job时有问题)
continue
if callable(job['func']):
# 记录job信息, 删除note信息,以防止schedule在add_job时有问题
job_note_dict[job['func'].__name__] = job.get('note', '')
if 'note' in job:
del job['note']
job['id'] = uuid4().hex # 生成id, 以便monitor_record中查找job使用
job['func'] = monitor_record(job['func'], 'scheduler', job['id'], scheduler)
scheduler.add_job(**job)
# 首次启动此进程时, 记录每个任务的下次执行时间以及状态
init_schedule_status(scheduler, job_note_dict, 'scheduler', host_ip, run_server_list)
try:
scheduler.start()
except Exception, e:
log.exception("scheduler quit with exception, e=%s" % e)
if __name__ == '__main__':
main()
# coding=UTF-8
"""
利用UDP协议获取本地ip
"""
import socket
def get_host_ip():
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
finally:
s.close()
return ip
# coding=utf-8
import datetime
from functools import wraps
from mongoengine import Document
from mongoengine.fields import IntField, DateTimeField, StringField, BooleanField, ListField
from apps.common.utils.utils_log import log
class ScheduleStatus(Document):
"""用于记录计划任务的执行状态"""
WARN_WAY_CHOICE = (('app', '钉钉提醒'), ('sms', '短信提醒'), ('email', '邮箱提醒'))
STATUS_CHOICE = (('pending', '正常等待中'), ('error', '运行出错'), ('stopped', '已停止'), ('doing', '执行中'))
app = StringField(verbose_name='应用名') # ztcjl, kcjl等
module = StringField(verbose_name='模块名') # scheduler, scheduler_kwlib等
func = StringField(verbose_name='方法')
note = StringField(verbose_name='备注')
begin_time = DateTimeField(verbose_name='开始时间', default=None)
end_time = DateTimeField(verbose_name='结束时间', default=None)
run_count = IntField(verbose_name='运行次数', default=0) # 重启任务后重置为0
next_run_time = DateTimeField(verbose_name='下次运行时间')
status = StringField(verbose_name='状态', choices=STATUS_CHOICE)
timeout = IntField(verbose_name='下次运行时间误差(s)', default=60) # 单位秒
warned_time = DateTimeField(verbose_name='已提醒时间', default=None) # 异常任务才提醒
warn_interval = IntField(verbose_name='提示时间间隔(m)', default=60 * 24) # 若任务异常,则按间隔时间提醒
warn_msg = StringField(verbose_name='异常信息') # 异常时记录异常信息
warn_enabled = BooleanField(verbose_name='是否提醒', default=True)
host_ip = StringField(verbose_name='服务器IP')
warn_way = ListField(verbose_name='提醒方式', default=['app']) # 支持多选 ['app', 'sms'] WARN_WAY_CHOICE
meta = {'db_alias': 'monitor-db', 'collection': 'schedule_status', 'indexes': ['app', 'module', 'func']}
sch_status_coll = ScheduleStatus._get_collection()
def monitor_record(func, module_name, job_id, schedule_obj, app_name='kcjl'):
"""监控并记录任务执行情况"""
@wraps(func)
def _record(*args, **kwargs):
try:
func_name = func.__name__
status_choice = ScheduleStatus.STATUS_CHOICE
query_dict = {'app': app_name, 'module': module_name, 'func': func_name}
start_time = datetime.datetime.now()
# 执行开始时记录状态
update_cond = {'$set': {'begin_time': start_time, 'end_time': None, 'status': status_choice[3][0]}}
sch_status_coll.update(query_dict, update_cond, upsert=False)
# 任务执行逻辑
result = func(*args, **kwargs)
# 执行后状态更新
end_time = datetime.datetime.now()
next_time = schedule_obj.get_job(job_id).next_run_time
next_run_time = datetime.datetime(next_time.year, next_time.month, next_time.day,
next_time.hour, next_time.minute, next_time.second)
status = status_choice[0][0]
warn_msg = ''
if result and result.get('status') == 'failure': # 记录执行异常情况,以便提醒
status = status_choice[1][0]
warn_msg = result.get('msg', '')
update_cond = {'$set': {'end_time': end_time,
'next_run_time': next_run_time,
'status': status,
'warn_msg': warn_msg},
'$inc': {'run_count': 1}
}
sch_status_coll.update(query_dict, update_cond, upsert=False)
except Exception, e:
log.error('monitor_record error, e=%s' % e)
result = ''
return result
return _record
def init_schedule_status(scheduler, job_note_dict, module_name, host_ip, ip_list, app_name='kcjl'):
"""初始化scheduler相关运行状态, 启动scheduler时调用"""
try:
now = datetime.datetime.now(scheduler.timezone)
job_name_list = list() # 记录job function name
status_pending = ScheduleStatus.STATUS_CHOICE[0][0]
status_stopped = ScheduleStatus.STATUS_CHOICE[2][0]
for job in scheduler.get_jobs():
next_run_time = job.trigger.get_next_fire_time(None, now)
next_run_time = datetime.datetime(next_run_time.year, next_run_time.month, next_run_time.day,
next_run_time.hour, next_run_time.minute, next_run_time.second)
func_name = job.func.__name__
job_name_list.append(func_name)
filter_dict = {'app': app_name, 'module': module_name, 'func': func_name}
update_dict = {'begin_time': None,
'end_time': None,
'run_count': 0,
'next_run_time': next_run_time,
'status': status_pending,
'note': job_note_dict.get(func_name, ''),
'warn_msg': '',
'host_ip': host_ip}
insert_dict = {'app': app_name,
'module': module_name,
'func': func_name,
'note': job_note_dict.get(func_name, ''),
'begin_time': None,
'end_time': None,
'run_count': 0,
'next_run_time': next_run_time,
'status': status_pending,
'timeout': 60,
'warned_time': None,
'warn_interval': 60 * 24, # 默认24小时,单位分
'warn_msg': '',
'warn_enabled': True,
'warn_way': ['app'],
'host_ip': host_ip}
origin_obj = sch_status_coll.find_one(filter_dict)
if origin_obj:
sch_status_coll.update(filter_dict, {'$set': update_dict}, upsert=False)
else:
# sch_status_coll.insert(insert_dict)
ScheduleStatus.objects.create(**insert_dict)
# 设置当前模块中的已停止的任务
query_stop_dict = {'app': app_name, 'module': module_name, 'func': {'$nin': job_name_list},
'host_ip': {'$nin': ip_list}}
sch_status_coll.update(query_stop_dict, {'$set': {'status': status_stopped}}, upsert=False)
except Exception, e:
log.error('init_schedule_status, e=%s' % e)
log.info('init_schedule_status success.')
# coding=UTF-8
"""定义log对象,以及改写log的部分函数,改变日志的输出颜色"""
import os
import logging.config
import settings
class MyLogger(logging.Logger):
"""重写Logger的某些函数,给这些等级的日志输出不同颜色"""
def error(self, msg, *args, **kwargs):
"""重写error函数"""
if self.isEnabledFor(40):
# self._log(40, msg, args, **kwargs)
self._log(40, "\033[31;1m%s\033[0m" % msg, args, **kwargs)
logging.setLoggerClass(MyLogger)
logging.config.fileConfig(os.path.join(settings.PROJECT_ROOT, "logger.conf"))
log = logging.getLogger("infile")
log.info("============== log initialized ==============")