springboot整合quartz

一、quartz核心概念

  1. Job 表示一个工作,要执行的具体内容。此接口中只有一个方法,如下:
void execute(JobExecutionContext context) 
  1. JobDetail 表示一个具体的可执行的调度程序,Job 是这个可执行程调度程序所要执行的内容,另外 JobDetail 还包含了这个任务调度的方案和策略。
  2. Trigger 代表一个调度参数的配置,什么时候去调。
  3. Scheduler 代表一个调度容器,一个调度容器中可以注册多个 JobDetail 和 Trigger。当 Trigger 与 JobDetail 组合,就可以被 Scheduler 容器调度了。

二、quartz工作原理

Quartz 是一个完全由 Java 编写的开源作业调度框架,为在 Java 应用程序中进行作业调度提供了简单却强大的机制。

一个 Quartz 集群中的每个节点是一个独立的 Quartz 应用,它又管理着其他的节点。意思是你必须对每个节点分别启动或停止。不像许多应用服务器的集群,独立的 Quartz 节点并不与另一其的节点或是管理节点通信。Quartz 应用是通过数据库表来感知到另一应用的。离开了db将无法感知。


三、quartz建表语句

可在官网下载quartz版本对应的sql语句。官网地址:http://www.quartz-scheduler.org/

DROP TABLE IF EXISTS QRTZ_LOCKS;
DROP TABLE IF EXISTS QRTZ_FIRED_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_PAUSED_TRIGGER_GRPS;
DROP TABLE IF EXISTS QRTZ_SCHEDULER_STATE;
DROP TABLE IF EXISTS QRTZ_SIMPLE_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_SIMPROP_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_CRON_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_BLOB_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_JOB_DETAILS;
DROP TABLE IF EXISTS QRTZ_CALENDARS;

CREATE TABLE `QRTZ_CALENDARS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `CALENDAR_NAME` VARCHAR(190) NOT NULL,
  `CALENDAR` BLOB NOT NULL,
  PRIMARY KEY (`SCHED_NAME`,`CALENDAR_NAME`)
);

CREATE TABLE `QRTZ_FIRED_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `ENTRY_ID` VARCHAR(95) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `INSTANCE_NAME` VARCHAR(190) NOT NULL,
  `FIRED_TIME` BIGINT NOT NULL,
  `SCHED_TIME` BIGINT NOT NULL,
  `PRIORITY` INT NOT NULL,
  `STATE` VARCHAR(16) NOT NULL,
  `JOB_NAME` VARCHAR(190) DEFAULT NULL,
  `JOB_GROUP` VARCHAR(190) DEFAULT NULL,
  `IS_NONCONCURRENT` VARCHAR(1) DEFAULT NULL,
  `REQUESTS_RECOVERY` VARCHAR(1) DEFAULT NULL,
  PRIMARY KEY (`SCHED_NAME`,`ENTRY_ID`),
  KEY `IDX_QRTZ_FT_TRIG_INST_NAME` (`SCHED_NAME`,`INSTANCE_NAME`),
  KEY `IDX_QRTZ_FT_INST_JOB_REQ_RCVRY` (`SCHED_NAME`,`INSTANCE_NAME`,`REQUESTS_RECOVERY`),
  KEY `IDX_QRTZ_FT_J_G` (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
  KEY `IDX_QRTZ_FT_JG` (`SCHED_NAME`,`JOB_GROUP`),
  KEY `IDX_QRTZ_FT_T_G` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  KEY `IDX_QRTZ_FT_TG` (`SCHED_NAME`,`TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_JOB_DETAILS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `JOB_NAME` VARCHAR(190) NOT NULL,
  `JOB_GROUP` VARCHAR(190) NOT NULL,
  `DESCRIPTION` VARCHAR(250) DEFAULT NULL,
  `JOB_CLASS_NAME` VARCHAR(250) NOT NULL,
  `IS_DURABLE` VARCHAR(1) NOT NULL,
  `IS_NONCONCURRENT` VARCHAR(1) NOT NULL,
  `IS_UPDATE_DATA` VARCHAR(1) NOT NULL,
  `REQUESTS_RECOVERY` VARCHAR(1) NOT NULL,
  `JOB_DATA` BLOB,
  PRIMARY KEY (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
  KEY `IDX_QRTZ_J_REQ_RECOVERY` (`SCHED_NAME`,`REQUESTS_RECOVERY`),
  KEY `IDX_QRTZ_J_GRP` (`SCHED_NAME`,`JOB_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_LOCKS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `LOCK_NAME` VARCHAR(40) NOT NULL,
  PRIMARY KEY (`SCHED_NAME`,`LOCK_NAME`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_PAUSED_TRIGGER_GRPS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_SCHEDULER_STATE` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `INSTANCE_NAME` VARCHAR(190) NOT NULL,
  `LAST_CHECKIN_TIME` BIGINT NOT NULL,
  `CHECKIN_INTERVAL` BIGINT NOT NULL,
  PRIMARY KEY (`SCHED_NAME`,`INSTANCE_NAME`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `JOB_NAME` VARCHAR(190) NOT NULL,
  `JOB_GROUP` VARCHAR(190) NOT NULL,
  `DESCRIPTION` VARCHAR(250) DEFAULT NULL,
  `NEXT_FIRE_TIME` BIGINT DEFAULT NULL,
  `PREV_FIRE_TIME` BIGINT DEFAULT NULL,
  `PRIORITY` INT DEFAULT NULL,
  `TRIGGER_STATE` VARCHAR(16) NOT NULL,
  `TRIGGER_TYPE` VARCHAR(8) NOT NULL,
  `START_TIME` BIGINT NOT NULL,
  `END_TIME` BIGINT DEFAULT NULL,
  `CALENDAR_NAME` VARCHAR(190) DEFAULT NULL,
  `MISFIRE_INSTR` SMALLINT DEFAULT NULL,
  `JOB_DATA` BLOB,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  KEY `IDX_QRTZ_T_J` (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
  KEY `IDX_QRTZ_T_JG` (`SCHED_NAME`,`JOB_GROUP`),
  KEY `IDX_QRTZ_T_C` (`SCHED_NAME`,`CALENDAR_NAME`),
  KEY `IDX_QRTZ_T_G` (`SCHED_NAME`,`TRIGGER_GROUP`),
  KEY `IDX_QRTZ_T_STATE` (`SCHED_NAME`,`TRIGGER_STATE`),
  KEY `IDX_QRTZ_T_N_STATE` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
  KEY `IDX_QRTZ_T_N_G_STATE` (`SCHED_NAME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
  KEY `IDX_QRTZ_T_NEXT_FIRE_TIME` (`SCHED_NAME`,`NEXT_FIRE_TIME`),
  KEY `IDX_QRTZ_T_NFT_ST` (`SCHED_NAME`,`TRIGGER_STATE`,`NEXT_FIRE_TIME`),
  KEY `IDX_QRTZ_T_NFT_MISFIRE` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`),
  KEY `IDX_QRTZ_T_NFT_ST_MISFIRE` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`,`TRIGGER_STATE`),
  KEY `IDX_QRTZ_T_NFT_ST_MISFIRE_GRP` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
  CONSTRAINT `qrtz_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `JOB_NAME`, `JOB_GROUP`) REFERENCES `QRTZ_JOB_DETAILS` (`SCHED_NAME`, `JOB_NAME`, `JOB_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_BLOB_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `BLOB_DATA` BLOB,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  KEY `SCHED_NAME` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  CONSTRAINT `qrtz_blob_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_SIMPLE_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `REPEAT_COUNT` BIGINT NOT NULL,
  `REPEAT_INTERVAL` BIGINT NOT NULL,
  `TIMES_TRIGGERED` BIGINT NOT NULL,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  CONSTRAINT `qrtz_simple_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_CRON_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `CRON_EXPRESSION` VARCHAR(120) NOT NULL,
  `TIME_ZONE_ID` VARCHAR(80) DEFAULT NULL,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  CONSTRAINT `qrtz_cron_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

CREATE TABLE `QRTZ_SIMPROP_TRIGGERS` (
  `SCHED_NAME` VARCHAR(120) NOT NULL,
  `TRIGGER_NAME` VARCHAR(190) NOT NULL,
  `TRIGGER_GROUP` VARCHAR(190) NOT NULL,
  `STR_PROP_1` VARCHAR(512) DEFAULT NULL,
  `STR_PROP_2` VARCHAR(512) DEFAULT NULL,
  `STR_PROP_3` VARCHAR(512) DEFAULT NULL,
  `INT_PROP_1` INT DEFAULT NULL,
  `INT_PROP_2` INT DEFAULT NULL,
  `LONG_PROP_1` BIGINT DEFAULT NULL,
  `LONG_PROP_2` BIGINT DEFAULT NULL,
  `DEC_PROP_1` DECIMAL(13,4) DEFAULT NULL,
  `DEC_PROP_2` DECIMAL(13,4) DEFAULT NULL,
  `BOOL_PROP_1` VARCHAR(1) DEFAULT NULL,
  `BOOL_PROP_2` VARCHAR(1) DEFAULT NULL,
  PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
  CONSTRAINT `qrtz_simprop_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;

四、依赖

        
        
            org.springframework.boot
            spring-boot-starter-quartz
        

五、application.yml

spring:
  #分布式定时任务相关属性配置
  quartz:
    properties:
      org:
        quartz:
          scheduler:
            instanceName: clusteredScheduler
            instanceId: AUTO
          jobStore:
            class: org.quartz.impl.jdbcjobstore.JobStoreTX
            driverDelegateClass: org.quartz.impl.jdbcjobstore.StdJDBCDelegate
            tablePrefix: QRTZ_
            isClustered: true
            clusterCheckinInterval: 10000
            useProperties: false
          threadPool:
            class: org.quartz.simpl.SimpleThreadPool
            threadCount: 10
            threadPriority: 5
            threadsInheritContextClassLoaderOfInitializingThread: true
    #数据库方式
    job-store-type: jdbc
    #初始化表结构
    jdbc:
      initialize-schema: always

六、代码

定时任务需要实现job接口,但是quartz已经有了一个简单的抽象实现,简化了代码量.只需要直接继承即可(QuartzJobBean).

package com.nieyue.news.webmagic.quartz;

import com.nieyue.news.webmagic.downloader.SeleniumDownloader;
import com.nieyue.news.webmagic.pipeline.ArticlePipeline;
import com.nieyue.news.webmagic.processor.ArticleProcessor;
import com.nieyue.news.webmagic.utils.YamlReader;
import lombok.SneakyThrows;
import org.quartz.JobDetail;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.quartz.QuartzJobBean;
import org.springframework.stereotype.Component;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;

import javax.servlet.http.HttpServletRequest;
import java.net.ServerSocket;
import java.util.Date;
import java.util.Properties;

@Component
public class FengArticleJob extends QuartzJobBean {

    private Logger logger=  LoggerFactory.getLogger(this.getClass());

    private static final String URL = "https://www.ifeng.com/";

    @Autowired
    private ArticleProcessor articleProcessor;

    @Autowired
    private ArticlePipeline articlePipeline;

    @SneakyThrows
    @Override
    protected void executeInternal(JobExecutionContext context) throws JobExecutionException {
        JobDetail jobDetail = context.getJobDetail();
//        Object port = YamlReader.getInstance().getValueByKey("server", "port");
//        logger.info("进入定时任务自动爬取凤凰网文章, taskName: " + jobDetail.getKey().getName() +
//                ", jobGroup: " + jobDetail.getKey().getGroup() + ", jobClass: " + jobDetail.getJobClass().getName()+", port: "+port);
        logger.info("进入定时任务自动爬取凤凰网文章, taskName: " + jobDetail.getKey().getName() +
                ", jobGroup: " + jobDetail.getKey().getGroup() + ", jobClass: " + jobDetail.getJobClass().getName());

        Properties sConfig = new Properties();
        sConfig.load(Thread.currentThread().getContextClassLoader().getResourceAsStream("selenium.properties"));

        // 执行
        Spider.create(articleProcessor)
                .addUrl(URL)
                // 自定义Pipeline,保存到数据库
                .addPipeline(articlePipeline)
                .thread(5)
                /*
                 * 为 SeleniumDownloader 设置休眠时间:
                 * 当动态加载页面时,可能还存在部分数据没有加载完毕,为它设置休眠时间后,可保证有足够的时间,加载完
                 */
                .setDownloader(new SeleniumDownloader((String)sConfig.get("chrome_driver_path")).setSleepTime(3000))
                // 设置调度策略及去重策略(并设置对最多10万数据进行去重)
                .setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(10 * 1000)))
                .run();
    }
}
package com.nieyue.news.webmagic.quartz;

import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.quartz.CronTriggerFactoryBean;
import org.springframework.scheduling.quartz.JobDetailFactoryBean;

//@Configuration
public class TaskConfig {

    //配置定时任务1
    @Bean
    public JobDetailFactoryBean job1() {
        JobDetailFactoryBean jobDetail = new JobDetailFactoryBean();
        //配置任务的具体实现
        jobDetail.setJobClass(FengArticleJob.class);
        //是否持久化
        jobDetail.setDurability(true);
        //出现异常是否重新执行
        jobDetail.setRequestsRecovery(true);
        //配置定时任务信息
        jobDetail.setName("fengAiticleJob");
        jobDetail.setGroup("fengAiticleJobGroup");
        jobDetail.setDescription("定时任务自动爬取凤凰网文章");
        return jobDetail;
    }

    //配置任务定时规则1
    @Bean
    public CronTriggerFactoryBean trigger1() {
        CronTriggerFactoryBean cronTrigger = new CronTriggerFactoryBean();
        //定时规则的分组
        cronTrigger.setGroup("fengAiticleTrigger");
        cronTrigger.setName("fengAiticleTriggerGroup");
        //配置执行的任务jobdetail
        cronTrigger.setJobDetail(job1().getObject());
        //配置执行规则 每5秒执行一次
        cronTrigger.setCronExpression("0 0 9,12,15 * * ?");
        return cronTrigger;
    }
}

七、测试任务

可复制项目在启动一份或多份,改端口。看是否任务同时执行 & 关闭其中的项目,看其他项目任务是否执行。如果满足以上则证明quartz可适用分布式定时任务。

参考文档

https://www.w3cschool.cn/quartz_doc/
https://blog.csdn.net/yangshengwei230612/article/details/114402203

你可能感兴趣的:(springboot整合quartz)