2019独角兽企业重金招聘Python工程师标准>>>
Spring Batch_使用多线程运行一组JOB
主要思路:在spring batch中,一个job会完成一个任务,处理一个数据集,有时这个数据集会很大,导致运行时间很长(虽然做了各种优化,数据库访问的优化,代码的优化等等),但是我想如果把这个数据集分成几块,配置几个相同的job来完成同一个任务,每个job处理其中一个数据块。这样不是也能提高效率,节省时间吗?
那么我们就来实验一下,看看可操作性。
如何给给一个大的数据集分块:可以利用limit。通过limit 构造两个sql语句,通过jobParameters 动态传递给运行中的job,那么job的item reader就会读取特定sql 语句查询上来的数据,然后进行处理。
下面我的spring batch的配置文件:
可以看到有两个job -addPeopleDescJob_1 和 addPeopleDescJob_2,每个job的reader 是不一样的,不一样的地方在 sql参数的不一样,是通过job parameter 动态传递进来的。
下面是AppMain4.java
package com.lyx.batch;
import javax.sql.DataSource;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.JobParametersInvalidException;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException;
import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException;
import org.springframework.batch.core.repository.JobRestartException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
@Component
public class AppMain4 {
private static JdbcTemplate jdbcTemplate;
@Autowired
public void setDataSource(DataSource dataSource) {
jdbcTemplate = new JdbcTemplate(dataSource);
}
public static void main(String[] args)
throws JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException {
long startTime = System.currentTimeMillis(); // 获取开始时间
@SuppressWarnings("resource")
final ApplicationContext context = new ClassPathXmlApplicationContext(
new String[] { "classpath:spring-batch4.xml" });
final JobLauncher launcher = (JobLauncher) context
.getBean("jobLauncher");
int rowCount = jdbcTemplate.queryForObject(
"select count(*) from people where "
+ "first_name like '%JOHN%' or last_name like '%DOE%'",
Integer.class);
final String sql1;
final String sql2;
int mid = (rowCount - 1) >>> 1;
if ((rowCount & 1) == 0) { // 偶数
sql1 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? limit 0," + mid;
sql2 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? order by person_id desc limit 0,"
+ mid;
} else { // 奇数
sql1 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? limit 0," + mid;
sql2 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? order by person_id desc limit 0,"
+ (mid + 1);
}
Thread thread_1 = new Thread(new Runnable() {
public void run() {
long t1 = System.currentTimeMillis(); // 获取开始时间
// TODO Auto-generated method stub
JobParametersBuilder job1 = new JobParametersBuilder();
job1.addString("sql1", sql1);
Job task1 = (Job) context.getBean("addPeopleDescJob_1");
try {
JobExecution result1 = launcher.run(task1,
job1.toJobParameters());
ExitStatus es1 = result1.getExitStatus();
if (es1.getExitCode().equals(
ExitStatus.COMPLETED.getExitCode())) {
System.out.println("job1任务正常完成");
} else {
System.out.println("job1任务失败,exitCode="
+ es1.getExitCode());
}
} catch (JobExecutionAlreadyRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobRestartException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobInstanceAlreadyCompleteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobParametersInvalidException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long t2 = System.currentTimeMillis(); // 获取结束时间
System.out.println(Thread.currentThread().getName() + "运行时间: "
+ (t2 - t1) + "ms");
}
});
thread_1.start();
Thread thread_2 = new Thread(new Runnable() {
public void run() {
long t1 = System.currentTimeMillis();
// TODO Auto-generated method stub
JobParametersBuilder job2 = new JobParametersBuilder();
// 设置JobParameter
job2.addString("sql2", sql2);
Job task2 = (Job) context.getBean("addPeopleDescJob_2");
try {
JobExecution result2 = launcher.run(task2,
job2.toJobParameters());
ExitStatus es2 = result2.getExitStatus();
if (es2.getExitCode().equals(
ExitStatus.COMPLETED.getExitCode())) {
System.out.println("job2任务正常完成");
} else {
System.out.println("job2任务失败,exitCode="
+ es2.getExitCode());
}
} catch (JobExecutionAlreadyRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobRestartException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobInstanceAlreadyCompleteException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JobParametersInvalidException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long t2 = System.currentTimeMillis(); // 获取结束时间
System.out.println(Thread.currentThread().getName() + "运行时间: "
+ (t2 - t1) + "ms");
}
});
thread_2.start();
long endTime = System.currentTimeMillis(); // 获取结束时间
System.out.println("程序运行时间: " + (endTime - startTime) + "ms");
}
}
PeoplePreparedStatementSetter.java
package com.lyx.batch;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import org.springframework.jdbc.core.PreparedStatementSetter;
public class PeoplePreparedStatementSetter implements PreparedStatementSetter {
public void setValues(PreparedStatement ps) throws SQLException {
// TODO Auto-generated method stub
ps.setString(1, "%JOHN%");
ps.setString(2, "%DOE%");
// ps.setInt(3, 1);
// ps.setInt(4, 100);
}
}
运行结果:
job1任务正常完成
Thread-3运行时间: 4573ms
job2任务正常完成
Thread-4运行时间: 4627ms
看到每个线程的运行时间都在4秒多。
再看一下在一个线程中运行一组Job的情况:
AppMain3.java
package com.lyx.batch;
import javax.sql.DataSource;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.JobParametersInvalidException;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException;
import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException;
import org.springframework.batch.core.repository.JobRestartException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
@Component
public class AppMain3 {
private static JdbcTemplate jdbcTemplate;
@Autowired
public void setDataSource(DataSource dataSource) {
jdbcTemplate = new JdbcTemplate(dataSource);
}
public static void main(String[] args)
throws JobExecutionAlreadyRunningException, JobRestartException,
JobInstanceAlreadyCompleteException, JobParametersInvalidException {
long startTime = System.currentTimeMillis(); // 获取开始时间
@SuppressWarnings("resource")
ApplicationContext context = new ClassPathXmlApplicationContext(
new String[] { "classpath:spring-batch4.xml" });
JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher");
int rowCount = jdbcTemplate.queryForObject(
"select count(*) from people where "
+ "first_name like '%JOHN%' or last_name like '%DOE%'",
Integer.class);
String sql1 = null;
String sql2 = null;
int mid = (rowCount - 1) >>> 1;
if ((rowCount & 1) == 0) { // 偶数
sql1 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? limit 0," + mid;
sql2 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? order by person_id desc limit 0,"
+ mid;
} else { // 奇数
sql1 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? limit 0," + mid;
sql2 = "select first_name ,last_name from people where "
+ "first_name like ? or last_name like ? order by person_id desc limit 0,"
+ (mid + 1);
}
JobParametersBuilder job1 = new JobParametersBuilder();
job1.addString("sql1", sql1);
Job task1 = (Job) context.getBean("addPeopleDescJob_1");
JobExecution result1 = launcher.run(task1, job1.toJobParameters());
ExitStatus es1 = result1.getExitStatus();
if (es1.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) {
System.out.println("job1任务正常完成");
} else {
System.out.println("job1任务失败,exitCode=" + es1.getExitCode());
}
JobParametersBuilder job2 = new JobParametersBuilder();
// 设置JobParameter
job2.addString("sql2", sql2);
Job task2 = (Job) context.getBean("addPeopleDescJob_2");
JobExecution result2 = launcher.run(task2, job2.toJobParameters());
ExitStatus es2 = result2.getExitStatus();
if (es2.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) {
System.out.println("job2任务正常完成");
} else {
System.out.println("job2任务失败,exitCode=" + es2.getExitCode());
}
long endTime = System.currentTimeMillis(); // 获取结束时间
System.out.println("程序运行时间: " + (endTime - startTime) + "ms");
}
}
运行结果:
job1任务正常完成
job2任务正常完成
程序运行时间: 8706ms
结果你也看到了,是不是多线程运行一组job效率更高。但是用多线程,配置一组相同的job带来的问题我没有预计到,虽然提高了效率,但可能给job的重试和重启还有job的管理带来了问题。
==============END==============