APScheduler任务调度工具在Django项目中的使用

1. APScheduler

APScheduler:强大的任务调度工具,可以完成定时任务,周期任务等,它是跨平台的,用于取代Linux下的cron daemon或者Windows下的task scheduler。

内置三种调度调度系统:

  • Cron风格(具有可选的开始/结束时间)
  • 间隔性执行
  • 仅在某个时间执行一次

2.代码演练

# coding=UTF-8
import init_environ
from uuid import uuid4
from apscheduler.schedulers.background import BlockingScheduler
# 以下导包为项目的脚本或方法、函数,表示用于最终执行任务的具体方法
from apps.common.utils.utils_host_ip import get_host_ip
from apps.common.utils.utils_mysql import check_mysql_conn
from apps.common.utils.utils_log import log
from apps.web.api import cleanup_cron_expired
from apps.router.models import OrderSyncer
from apps.ncrm.models import Customer
from apps.mnt.models_monitor import MntCostSnap
from scripts.adgroup_top_rpt import prehalfmonth_adgroup_top
from apps.toolkit.schedule_monitor import monitor_record, init_schedule_status
from scripts.sync_app_comments import sync_comment

# 注意:以上导入执行的方法名称请保持唯一,如 main as func_A, main as func_B 可能会导致后面的统计重复

# 定时任务在多台服务器上执行时,可以记录服务器的内网ip,通过判断ip是否是当前服务器ip即可决定是否加入到本服务器的定时任务中去
run_server_list = [
    "192.168.16.193",  # server01内网ip
    "192.168.16.194",  # server02内网ip
]

JOB_LIST = [
    {'func': sync_comment, 'trigger': 'cron', 'hour': '0,8-23', 'minute': 0,
     "run_server": run_server_list[0],
     'note': '每天0点、8点到23点,每个小时同步历史评论'},

    {'func': cleanup_cron_expired, 'trigger': 'cron', 'hour': 0, 'minute': 30,
     "run_server": run_server_list[0],
     'note': '每天0点30分,清除掉过期15天内的用户所有数据'},

    # day_of_week (0-6 or mon,tue,wed,thu,fri,sat,sun) - (表示一周中的第几天,既可以用0-6表示也可以用其英语缩写表示)

    {'func': prehalfmonth_adgroup_top, 'trigger': 'cron', 'day': 'last', 'hour': 21, 'minute': 30,
     "run_server": run_server_list[0],
     'note': '每月的最后一天21:30,统计本月1号到15号宝贝top榜报表'},

    {'func': MntCostSnap.save_rptsnap, 'trigger': 'cron', 'hour': 22, 'minute': 30,
     "run_server": run_server_list[1],
     'note': '每天22点30分,保存托管计划快照'},

    {'func': check_mysql_conn(OrderSyncer.sync_order_from_top), 'trigger': 'interval', 'minutes': 3,
     "run_server": run_server_list[1],
     'note': '每3分钟,调用淘宝同步接口同步订单'},

    {'func': Customer.refresh_latest_4inservice, 'trigger': 'cron', 'day_of_week': 5, 'hour': 21, 'minute': 00,
     "run_server": run_server_list[1],
     'note': '每周六21点,刷新当前服务中客户的服务结束时间、最新版本、最高版本(业务中也有刷新)'},
]


# 详细参数说明参考:http://apscheduler.readthedocs.org/en/latest/modules/schedulers/base.html#apscheduler.schedulers.base.BaseScheduler.add_job

def main():
    scheduler = BlockingScheduler()
    host_ip = get_host_ip()  # 取本机ip地址
    job_note_dict = dict()
    for job in JOB_LIST:
        if host_ip != job.pop('run_server'):
            # 遍历JOB_LIST,判断该job是否是需要在本机执行,否则跳过判断下一个job(删除"run_server"是防止schedule在add_job时有问题)
            continue
        if callable(job['func']):
            # 记录job信息, 删除note信息,以防止schedule在add_job时有问题
            job_note_dict[job['func'].__name__] = job.get('note', '')
            if 'note' in job:
                del job['note']

            job['id'] = uuid4().hex  # 生成id, 以便monitor_record中查找job使用
            job['func'] = monitor_record(job['func'], 'scheduler', job['id'], scheduler)
            scheduler.add_job(**job)

    # 首次启动此进程时, 记录每个任务的下次执行时间以及状态
    init_schedule_status(scheduler, job_note_dict, 'scheduler', host_ip, run_server_list)

    try:
        scheduler.start()
    except Exception, e:
        log.exception("scheduler quit with exception, e=%s" % e)


if __name__ == '__main__':
    main()


3.取本机服务器ip:utils_get_host_ip.py

# coding=UTF-8
"""
利用UDP协议获取本地ip
"""
import socket


def get_host_ip():
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 80))
        ip = s.getsockname()[0]
    finally:
        s.close()
    return ip

4.建表记录定时任务执行状态:schedule_monitor.py

# coding=utf-8
import datetime
from functools import wraps

from mongoengine import Document
from mongoengine.fields import IntField, DateTimeField, StringField, BooleanField, ListField

from apps.common.utils.utils_log import log


class ScheduleStatus(Document):
    """用于记录计划任务的执行状态"""
    WARN_WAY_CHOICE = (('app', '钉钉提醒'), ('sms', '短信提醒'), ('email', '邮箱提醒'))
    STATUS_CHOICE = (('pending', '正常等待中'), ('error', '运行出错'), ('stopped', '已停止'), ('doing', '执行中'))

    app = StringField(verbose_name='应用名')  # ztcjl, kcjl等
    module = StringField(verbose_name='模块名')  # scheduler, scheduler_kwlib等
    func = StringField(verbose_name='方法')
    note = StringField(verbose_name='备注')
    begin_time = DateTimeField(verbose_name='开始时间', default=None)
    end_time = DateTimeField(verbose_name='结束时间', default=None)
    run_count = IntField(verbose_name='运行次数', default=0)  # 重启任务后重置为0
    next_run_time = DateTimeField(verbose_name='下次运行时间')
    status = StringField(verbose_name='状态', choices=STATUS_CHOICE)
    timeout = IntField(verbose_name='下次运行时间误差(s)', default=60)  # 单位秒
    warned_time = DateTimeField(verbose_name='已提醒时间', default=None)  # 异常任务才提醒
    warn_interval = IntField(verbose_name='提示时间间隔(m)', default=60 * 24)  # 若任务异常,则按间隔时间提醒
    warn_msg = StringField(verbose_name='异常信息')  # 异常时记录异常信息
    warn_enabled = BooleanField(verbose_name='是否提醒', default=True)
    host_ip = StringField(verbose_name='服务器IP')

    warn_way = ListField(verbose_name='提醒方式', default=['app'])  # 支持多选 ['app', 'sms'] WARN_WAY_CHOICE

    meta = {'db_alias': 'monitor-db', 'collection': 'schedule_status', 'indexes': ['app', 'module', 'func']}


sch_status_coll = ScheduleStatus._get_collection()


def monitor_record(func, module_name, job_id, schedule_obj, app_name='kcjl'):
    """监控并记录任务执行情况"""

    @wraps(func)
    def _record(*args, **kwargs):
        try:
            func_name = func.__name__
            status_choice = ScheduleStatus.STATUS_CHOICE
            query_dict = {'app': app_name, 'module': module_name, 'func': func_name}
            start_time = datetime.datetime.now()
            # 执行开始时记录状态
            update_cond = {'$set': {'begin_time': start_time, 'end_time': None, 'status': status_choice[3][0]}}
            sch_status_coll.update(query_dict, update_cond, upsert=False)

            # 任务执行逻辑
            result = func(*args, **kwargs)

            # 执行后状态更新
            end_time = datetime.datetime.now()
            next_time = schedule_obj.get_job(job_id).next_run_time
            next_run_time = datetime.datetime(next_time.year, next_time.month, next_time.day,
                                              next_time.hour, next_time.minute, next_time.second)

            status = status_choice[0][0]
            warn_msg = ''
            if result and result.get('status') == 'failure':  # 记录执行异常情况,以便提醒
                status = status_choice[1][0]
                warn_msg = result.get('msg', '')
            update_cond = {'$set': {'end_time': end_time,
                                    'next_run_time': next_run_time,
                                    'status': status,
                                    'warn_msg': warn_msg},
                           '$inc': {'run_count': 1}
                           }
            sch_status_coll.update(query_dict, update_cond, upsert=False)
        except Exception, e:
            log.error('monitor_record error, e=%s' % e)
            result = ''
        return result

    return _record


def init_schedule_status(scheduler, job_note_dict, module_name, host_ip, ip_list, app_name='kcjl'):
    """初始化scheduler相关运行状态, 启动scheduler时调用"""
    try:
        now = datetime.datetime.now(scheduler.timezone)
        job_name_list = list()  # 记录job function name
        status_pending = ScheduleStatus.STATUS_CHOICE[0][0]
        status_stopped = ScheduleStatus.STATUS_CHOICE[2][0]
        for job in scheduler.get_jobs():
            next_run_time = job.trigger.get_next_fire_time(None, now)
            next_run_time = datetime.datetime(next_run_time.year, next_run_time.month, next_run_time.day,
                                              next_run_time.hour, next_run_time.minute, next_run_time.second)
            func_name = job.func.__name__
            job_name_list.append(func_name)
            filter_dict = {'app': app_name, 'module': module_name, 'func': func_name}
            update_dict = {'begin_time': None,
                           'end_time': None,
                           'run_count': 0,
                           'next_run_time': next_run_time,
                           'status': status_pending,
                           'note': job_note_dict.get(func_name, ''),
                           'warn_msg': '',
                           'host_ip': host_ip}
            insert_dict = {'app': app_name,
                           'module': module_name,
                           'func': func_name,
                           'note': job_note_dict.get(func_name, ''),
                           'begin_time': None,
                           'end_time': None,
                           'run_count': 0,
                           'next_run_time': next_run_time,
                           'status': status_pending,
                           'timeout': 60,
                           'warned_time': None,
                           'warn_interval': 60 * 24,  # 默认24小时,单位分
                           'warn_msg': '',
                           'warn_enabled': True,
                           'warn_way': ['app'],
                           'host_ip': host_ip}
            origin_obj = sch_status_coll.find_one(filter_dict)
            if origin_obj:
                sch_status_coll.update(filter_dict, {'$set': update_dict}, upsert=False)
            else:
                # sch_status_coll.insert(insert_dict)
                ScheduleStatus.objects.create(**insert_dict)

        # 设置当前模块中的已停止的任务
        query_stop_dict = {'app': app_name, 'module': module_name, 'func': {'$nin': job_name_list},
                           'host_ip': {'$nin': ip_list}}
        sch_status_coll.update(query_stop_dict, {'$set': {'status': status_stopped}}, upsert=False)
    except Exception, e:
        log.error('init_schedule_status, e=%s' % e)
    log.info('init_schedule_status success.')

4.utils_log.py

# coding=UTF-8
"""定义log对象,以及改写log的部分函数,改变日志的输出颜色"""

import os
import logging.config
import settings


class MyLogger(logging.Logger):
    """重写Logger的某些函数,给这些等级的日志输出不同颜色"""

    def error(self, msg, *args, **kwargs):
        """重写error函数"""

        if self.isEnabledFor(40):
            # self._log(40, msg, args, **kwargs)
            self._log(40, "\033[31;1m%s\033[0m" % msg, args, **kwargs)


logging.setLoggerClass(MyLogger)
logging.config.fileConfig(os.path.join(settings.PROJECT_ROOT, "logger.conf"))
log = logging.getLogger("infile")
log.info("============== log initialized ==============")

你可能感兴趣的:(python高级用法,python,linux,数据库,mongodb,django)