一、quartz核心概念
- Job 表示一个工作,要执行的具体内容。此接口中只有一个方法,如下:
void execute(JobExecutionContext context)
- JobDetail 表示一个具体的可执行的调度程序,Job 是这个可执行程调度程序所要执行的内容,另外 JobDetail 还包含了这个任务调度的方案和策略。
- Trigger 代表一个调度参数的配置,什么时候去调。
- Scheduler 代表一个调度容器,一个调度容器中可以注册多个 JobDetail 和 Trigger。当 Trigger 与 JobDetail 组合,就可以被 Scheduler 容器调度了。
二、quartz工作原理
Quartz 是一个完全由 Java 编写的开源作业调度框架,为在 Java 应用程序中进行作业调度提供了简单却强大的机制。
一个 Quartz 集群中的每个节点是一个独立的 Quartz 应用,它又管理着其他的节点。意思是你必须对每个节点分别启动或停止。不像许多应用服务器的集群,独立的 Quartz 节点并不与另一其的节点或是管理节点通信。Quartz 应用是通过数据库表来感知到另一应用的。离开了db将无法感知。
三、quartz建表语句
可在官网下载quartz版本对应的sql语句。官网地址:http://www.quartz-scheduler.org/
DROP TABLE IF EXISTS QRTZ_LOCKS;
DROP TABLE IF EXISTS QRTZ_FIRED_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_PAUSED_TRIGGER_GRPS;
DROP TABLE IF EXISTS QRTZ_SCHEDULER_STATE;
DROP TABLE IF EXISTS QRTZ_SIMPLE_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_SIMPROP_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_CRON_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_BLOB_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_TRIGGERS;
DROP TABLE IF EXISTS QRTZ_JOB_DETAILS;
DROP TABLE IF EXISTS QRTZ_CALENDARS;
CREATE TABLE `QRTZ_CALENDARS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`CALENDAR_NAME` VARCHAR(190) NOT NULL,
`CALENDAR` BLOB NOT NULL,
PRIMARY KEY (`SCHED_NAME`,`CALENDAR_NAME`)
);
CREATE TABLE `QRTZ_FIRED_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`ENTRY_ID` VARCHAR(95) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`INSTANCE_NAME` VARCHAR(190) NOT NULL,
`FIRED_TIME` BIGINT NOT NULL,
`SCHED_TIME` BIGINT NOT NULL,
`PRIORITY` INT NOT NULL,
`STATE` VARCHAR(16) NOT NULL,
`JOB_NAME` VARCHAR(190) DEFAULT NULL,
`JOB_GROUP` VARCHAR(190) DEFAULT NULL,
`IS_NONCONCURRENT` VARCHAR(1) DEFAULT NULL,
`REQUESTS_RECOVERY` VARCHAR(1) DEFAULT NULL,
PRIMARY KEY (`SCHED_NAME`,`ENTRY_ID`),
KEY `IDX_QRTZ_FT_TRIG_INST_NAME` (`SCHED_NAME`,`INSTANCE_NAME`),
KEY `IDX_QRTZ_FT_INST_JOB_REQ_RCVRY` (`SCHED_NAME`,`INSTANCE_NAME`,`REQUESTS_RECOVERY`),
KEY `IDX_QRTZ_FT_J_G` (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
KEY `IDX_QRTZ_FT_JG` (`SCHED_NAME`,`JOB_GROUP`),
KEY `IDX_QRTZ_FT_T_G` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
KEY `IDX_QRTZ_FT_TG` (`SCHED_NAME`,`TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_JOB_DETAILS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`JOB_NAME` VARCHAR(190) NOT NULL,
`JOB_GROUP` VARCHAR(190) NOT NULL,
`DESCRIPTION` VARCHAR(250) DEFAULT NULL,
`JOB_CLASS_NAME` VARCHAR(250) NOT NULL,
`IS_DURABLE` VARCHAR(1) NOT NULL,
`IS_NONCONCURRENT` VARCHAR(1) NOT NULL,
`IS_UPDATE_DATA` VARCHAR(1) NOT NULL,
`REQUESTS_RECOVERY` VARCHAR(1) NOT NULL,
`JOB_DATA` BLOB,
PRIMARY KEY (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
KEY `IDX_QRTZ_J_REQ_RECOVERY` (`SCHED_NAME`,`REQUESTS_RECOVERY`),
KEY `IDX_QRTZ_J_GRP` (`SCHED_NAME`,`JOB_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_LOCKS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`LOCK_NAME` VARCHAR(40) NOT NULL,
PRIMARY KEY (`SCHED_NAME`,`LOCK_NAME`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_PAUSED_TRIGGER_GRPS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_SCHEDULER_STATE` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`INSTANCE_NAME` VARCHAR(190) NOT NULL,
`LAST_CHECKIN_TIME` BIGINT NOT NULL,
`CHECKIN_INTERVAL` BIGINT NOT NULL,
PRIMARY KEY (`SCHED_NAME`,`INSTANCE_NAME`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`JOB_NAME` VARCHAR(190) NOT NULL,
`JOB_GROUP` VARCHAR(190) NOT NULL,
`DESCRIPTION` VARCHAR(250) DEFAULT NULL,
`NEXT_FIRE_TIME` BIGINT DEFAULT NULL,
`PREV_FIRE_TIME` BIGINT DEFAULT NULL,
`PRIORITY` INT DEFAULT NULL,
`TRIGGER_STATE` VARCHAR(16) NOT NULL,
`TRIGGER_TYPE` VARCHAR(8) NOT NULL,
`START_TIME` BIGINT NOT NULL,
`END_TIME` BIGINT DEFAULT NULL,
`CALENDAR_NAME` VARCHAR(190) DEFAULT NULL,
`MISFIRE_INSTR` SMALLINT DEFAULT NULL,
`JOB_DATA` BLOB,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
KEY `IDX_QRTZ_T_J` (`SCHED_NAME`,`JOB_NAME`,`JOB_GROUP`),
KEY `IDX_QRTZ_T_JG` (`SCHED_NAME`,`JOB_GROUP`),
KEY `IDX_QRTZ_T_C` (`SCHED_NAME`,`CALENDAR_NAME`),
KEY `IDX_QRTZ_T_G` (`SCHED_NAME`,`TRIGGER_GROUP`),
KEY `IDX_QRTZ_T_STATE` (`SCHED_NAME`,`TRIGGER_STATE`),
KEY `IDX_QRTZ_T_N_STATE` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
KEY `IDX_QRTZ_T_N_G_STATE` (`SCHED_NAME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
KEY `IDX_QRTZ_T_NEXT_FIRE_TIME` (`SCHED_NAME`,`NEXT_FIRE_TIME`),
KEY `IDX_QRTZ_T_NFT_ST` (`SCHED_NAME`,`TRIGGER_STATE`,`NEXT_FIRE_TIME`),
KEY `IDX_QRTZ_T_NFT_MISFIRE` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`),
KEY `IDX_QRTZ_T_NFT_ST_MISFIRE` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`,`TRIGGER_STATE`),
KEY `IDX_QRTZ_T_NFT_ST_MISFIRE_GRP` (`SCHED_NAME`,`MISFIRE_INSTR`,`NEXT_FIRE_TIME`,`TRIGGER_GROUP`,`TRIGGER_STATE`),
CONSTRAINT `qrtz_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `JOB_NAME`, `JOB_GROUP`) REFERENCES `QRTZ_JOB_DETAILS` (`SCHED_NAME`, `JOB_NAME`, `JOB_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_BLOB_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`BLOB_DATA` BLOB,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
KEY `SCHED_NAME` (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
CONSTRAINT `qrtz_blob_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_SIMPLE_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`REPEAT_COUNT` BIGINT NOT NULL,
`REPEAT_INTERVAL` BIGINT NOT NULL,
`TIMES_TRIGGERED` BIGINT NOT NULL,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
CONSTRAINT `qrtz_simple_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_CRON_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`CRON_EXPRESSION` VARCHAR(120) NOT NULL,
`TIME_ZONE_ID` VARCHAR(80) DEFAULT NULL,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
CONSTRAINT `qrtz_cron_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `QRTZ_SIMPROP_TRIGGERS` (
`SCHED_NAME` VARCHAR(120) NOT NULL,
`TRIGGER_NAME` VARCHAR(190) NOT NULL,
`TRIGGER_GROUP` VARCHAR(190) NOT NULL,
`STR_PROP_1` VARCHAR(512) DEFAULT NULL,
`STR_PROP_2` VARCHAR(512) DEFAULT NULL,
`STR_PROP_3` VARCHAR(512) DEFAULT NULL,
`INT_PROP_1` INT DEFAULT NULL,
`INT_PROP_2` INT DEFAULT NULL,
`LONG_PROP_1` BIGINT DEFAULT NULL,
`LONG_PROP_2` BIGINT DEFAULT NULL,
`DEC_PROP_1` DECIMAL(13,4) DEFAULT NULL,
`DEC_PROP_2` DECIMAL(13,4) DEFAULT NULL,
`BOOL_PROP_1` VARCHAR(1) DEFAULT NULL,
`BOOL_PROP_2` VARCHAR(1) DEFAULT NULL,
PRIMARY KEY (`SCHED_NAME`,`TRIGGER_NAME`,`TRIGGER_GROUP`),
CONSTRAINT `qrtz_simprop_triggers_ibfk_1` FOREIGN KEY (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`) REFERENCES `QRTZ_TRIGGERS` (`SCHED_NAME`, `TRIGGER_NAME`, `TRIGGER_GROUP`)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
四、依赖
org.springframework.boot
spring-boot-starter-quartz
五、application.yml
spring:
#分布式定时任务相关属性配置
quartz:
properties:
org:
quartz:
scheduler:
instanceName: clusteredScheduler
instanceId: AUTO
jobStore:
class: org.quartz.impl.jdbcjobstore.JobStoreTX
driverDelegateClass: org.quartz.impl.jdbcjobstore.StdJDBCDelegate
tablePrefix: QRTZ_
isClustered: true
clusterCheckinInterval: 10000
useProperties: false
threadPool:
class: org.quartz.simpl.SimpleThreadPool
threadCount: 10
threadPriority: 5
threadsInheritContextClassLoaderOfInitializingThread: true
#数据库方式
job-store-type: jdbc
#初始化表结构
jdbc:
initialize-schema: always
六、代码
定时任务需要实现job接口,但是quartz已经有了一个简单的抽象实现,简化了代码量.只需要直接继承即可(QuartzJobBean).
package com.nieyue.news.webmagic.quartz;
import com.nieyue.news.webmagic.downloader.SeleniumDownloader;
import com.nieyue.news.webmagic.pipeline.ArticlePipeline;
import com.nieyue.news.webmagic.processor.ArticleProcessor;
import com.nieyue.news.webmagic.utils.YamlReader;
import lombok.SneakyThrows;
import org.quartz.JobDetail;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.quartz.QuartzJobBean;
import org.springframework.stereotype.Component;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import javax.servlet.http.HttpServletRequest;
import java.net.ServerSocket;
import java.util.Date;
import java.util.Properties;
@Component
public class FengArticleJob extends QuartzJobBean {
private Logger logger= LoggerFactory.getLogger(this.getClass());
private static final String URL = "https://www.ifeng.com/";
@Autowired
private ArticleProcessor articleProcessor;
@Autowired
private ArticlePipeline articlePipeline;
@SneakyThrows
@Override
protected void executeInternal(JobExecutionContext context) throws JobExecutionException {
JobDetail jobDetail = context.getJobDetail();
// Object port = YamlReader.getInstance().getValueByKey("server", "port");
// logger.info("进入定时任务自动爬取凤凰网文章, taskName: " + jobDetail.getKey().getName() +
// ", jobGroup: " + jobDetail.getKey().getGroup() + ", jobClass: " + jobDetail.getJobClass().getName()+", port: "+port);
logger.info("进入定时任务自动爬取凤凰网文章, taskName: " + jobDetail.getKey().getName() +
", jobGroup: " + jobDetail.getKey().getGroup() + ", jobClass: " + jobDetail.getJobClass().getName());
Properties sConfig = new Properties();
sConfig.load(Thread.currentThread().getContextClassLoader().getResourceAsStream("selenium.properties"));
// 执行
Spider.create(articleProcessor)
.addUrl(URL)
// 自定义Pipeline,保存到数据库
.addPipeline(articlePipeline)
.thread(5)
/*
* 为 SeleniumDownloader 设置休眠时间:
* 当动态加载页面时,可能还存在部分数据没有加载完毕,为它设置休眠时间后,可保证有足够的时间,加载完
*/
.setDownloader(new SeleniumDownloader((String)sConfig.get("chrome_driver_path")).setSleepTime(3000))
// 设置调度策略及去重策略(并设置对最多10万数据进行去重)
.setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(10 * 1000)))
.run();
}
}
package com.nieyue.news.webmagic.quartz;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.quartz.CronTriggerFactoryBean;
import org.springframework.scheduling.quartz.JobDetailFactoryBean;
//@Configuration
public class TaskConfig {
//配置定时任务1
@Bean
public JobDetailFactoryBean job1() {
JobDetailFactoryBean jobDetail = new JobDetailFactoryBean();
//配置任务的具体实现
jobDetail.setJobClass(FengArticleJob.class);
//是否持久化
jobDetail.setDurability(true);
//出现异常是否重新执行
jobDetail.setRequestsRecovery(true);
//配置定时任务信息
jobDetail.setName("fengAiticleJob");
jobDetail.setGroup("fengAiticleJobGroup");
jobDetail.setDescription("定时任务自动爬取凤凰网文章");
return jobDetail;
}
//配置任务定时规则1
@Bean
public CronTriggerFactoryBean trigger1() {
CronTriggerFactoryBean cronTrigger = new CronTriggerFactoryBean();
//定时规则的分组
cronTrigger.setGroup("fengAiticleTrigger");
cronTrigger.setName("fengAiticleTriggerGroup");
//配置执行的任务jobdetail
cronTrigger.setJobDetail(job1().getObject());
//配置执行规则 每5秒执行一次
cronTrigger.setCronExpression("0 0 9,12,15 * * ?");
return cronTrigger;
}
}
七、测试任务
可复制项目在启动一份或多份,改端口。看是否任务同时执行 & 关闭其中的项目,看其他项目任务是否执行。如果满足以上则证明quartz可适用分布式定时任务。
参考文档
https://www.w3cschool.cn/quartz_doc/
https://blog.csdn.net/yangshengwei230612/article/details/114402203