Spring Batch_使用多线程运行一组相同任务的JOB
主要思路:在spring batch中,一个job会完成一个任务,处理一个数据集,有时这个数据集会很大,导致运行时间很长(虽然做了各种优化,数据库访问的优化,代码的优化等等),但是我想如果把这个数据集分成几块,配置几个相同的job来完成同一个任务,每个job处理其中一个数据块。这样不是也能提高效率,节省时间吗?
那么我们就来实验一下,看看可操作性。。。
如何给给一个大的数据集分块:可以利用limit。通过limit 构造两个sql语句,通过jobParameters 动态传递给运行中的job,那么job的item reader就会读取特定sql 语句查询上来的数据,然后进行处理。。。
下面我的spring batch的配置文件:
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch" xmlns:context="http://www.springframework.org/schema/context" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd"> <!-- 包的扫描 --> <context:component-scan base-package="com.lyx.batch" /> <bean id="exceptionHandler" class="com.lyx.batch.ExceptionListener" /> <batch:step id="abstractStep" abstract="true"> <batch:listeners> <batch:listener ref="exceptionHandler" /> </batch:listeners> </batch:step> <bean id="abstractCursorReader" abstract="true" class="org.springframework.batch.item.database.JdbcCursorItemReader"> <property name="dataSource" ref="dataSource" /> </bean> <batch:job id="addPeopleDescJob_1"> <batch:step id="addDescStep_1" parent="abstractStep"> <batch:tasklet> <batch:chunk reader="peopleAddDescReader_1" processor="addDescProcessor" writer="addDescPeopleWriter" commit-interval="2" /> </batch:tasklet> </batch:step> </batch:job> <bean id="peopleAddDescReader_1" parent="abstractCursorReader" scope="step"> <property name="sql" value="#{jobParameters['sql1']}" /> <property name="rowMapper" ref="peopleRowMapper" /> <property name="preparedStatementSetter" ref="preparedStatementSetter" /> <property name="fetchSize" value="20" /> </bean> <batch:job id="addPeopleDescJob_2"> <batch:step id="addDescStep_2" parent="abstractStep"> <batch:tasklet> <batch:chunk reader="peopleAddDescReader_2" processor="addDescProcessor" writer="addDescPeopleWriter" commit-interval="2" /> </batch:tasklet> </batch:step> </batch:job> <bean id="peopleAddDescReader_2" parent="abstractCursorReader" scope="step"> <property name="sql" value="#{jobParameters['sql2']}" /> <property name="rowMapper" ref="peopleRowMapper" /> <property name="preparedStatementSetter" ref="preparedStatementSetter" /> <property name="fetchSize" value="20" /> </bean> <bean id="peopleRowMapper" class="com.lyx.batch.PeopleRowMapper" /> <bean id="preparedStatementSetter" class="com.lyx.batch.PeoplePreparedStatementSetter" /> <bean id="addDescProcessor" class="com.lyx.batch.AddPeopleDescProcessor" /> <bean id="addDescPeopleWriter" class="com.lyx.batch.AddDescPeopleWriter"> <property name="dataSource" ref="dataSource" /> </bean> <!--tomcat jdbc pool数据源配置 --> <bean id="dataSource" class="org.apache.tomcat.jdbc.pool.DataSource" destroy-method="close"> <property name="poolProperties"> <bean class="org.apache.tomcat.jdbc.pool.PoolProperties"> <property name="driverClassName" value="com.mysql.jdbc.Driver" /> <property name="url" value="jdbc:mysql://localhost:3306/test" /> <property name="username" value="root" /> <property name="password" value="034039" /> </bean> </property> </bean> <!-- spring batch 配置jobRepository --> <batch:job-repository id="jobRepository" data-source="dataSource" transaction-manager="transactionManager" isolation-level-for-create="REPEATABLE_READ" table-prefix="BATCH_" max-varchar-length="1000" /> <!-- spring的事务管理器 --> <bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager"> <property name="dataSource" ref="dataSource" /> </bean> <!-- batch luncher --> <bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher"> <property name="jobRepository" ref="jobRepository" /> </bean> </beans>
可以看到有两个job -addPeopleDescJob_1 和 addPeopleDescJob_2,每个job的reader 是不一样的,不一样的地方在 sql参数的不一样,是通过job parameter 动态传递进来的。。
下面是AppMain4.java
package com.lyx.batch; import javax.sql.DataSource; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.Job; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParametersBuilder; import org.springframework.batch.core.JobParametersInvalidException; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; import org.springframework.batch.core.repository.JobRestartException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Component; @Component public class AppMain4 { private static JdbcTemplate jdbcTemplate; @Autowired public void setDataSource(DataSource dataSource) { jdbcTemplate = new JdbcTemplate(dataSource); } public static void main(String[] args) throws JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { long startTime = System.currentTimeMillis(); // 获取开始时间 @SuppressWarnings("resource") final ApplicationContext context = new ClassPathXmlApplicationContext( new String[] { "classpath:spring-batch4.xml" }); final JobLauncher launcher = (JobLauncher) context .getBean("jobLauncher"); int rowCount = jdbcTemplate.queryForObject( "select count(*) from people where " + "first_name like '%JOHN%' or last_name like '%DOE%'", Integer.class); final String sql1; final String sql2; int mid = (rowCount - 1) >>> 1; if ((rowCount & 1) == 0) { // 偶数 sql1 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? limit 0," + mid; sql2 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? order by person_id desc limit 0," + mid; } else { // 奇数 sql1 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? limit 0," + mid; sql2 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? order by person_id desc limit 0," + (mid + 1); } Thread thread_1 = new Thread(new Runnable() { public void run() { long t1 = System.currentTimeMillis(); // 获取开始时间 // TODO Auto-generated method stub JobParametersBuilder job1 = new JobParametersBuilder(); job1.addString("sql1", sql1); Job task1 = (Job) context.getBean("addPeopleDescJob_1"); try { JobExecution result1 = launcher.run(task1, job1.toJobParameters()); ExitStatus es1 = result1.getExitStatus(); if (es1.getExitCode().equals( ExitStatus.COMPLETED.getExitCode())) { System.out.println("job1任务正常完成"); } else { System.out.println("job1任务失败,exitCode=" + es1.getExitCode()); } } catch (JobExecutionAlreadyRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobRestartException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobInstanceAlreadyCompleteException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobParametersInvalidException e) { // TODO Auto-generated catch block e.printStackTrace(); } long t2 = System.currentTimeMillis(); // 获取结束时间 System.out.println(Thread.currentThread().getName() + "运行时间: " + (t2 - t1) + "ms"); } }); thread_1.start(); Thread thread_2 = new Thread(new Runnable() { public void run() { long t1 = System.currentTimeMillis(); // TODO Auto-generated method stub JobParametersBuilder job2 = new JobParametersBuilder(); // 设置JobParameter job2.addString("sql2", sql2); Job task2 = (Job) context.getBean("addPeopleDescJob_2"); try { JobExecution result2 = launcher.run(task2, job2.toJobParameters()); ExitStatus es2 = result2.getExitStatus(); if (es2.getExitCode().equals( ExitStatus.COMPLETED.getExitCode())) { System.out.println("job2任务正常完成"); } else { System.out.println("job2任务失败,exitCode=" + es2.getExitCode()); } } catch (JobExecutionAlreadyRunningException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobRestartException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobInstanceAlreadyCompleteException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (JobParametersInvalidException e) { // TODO Auto-generated catch block e.printStackTrace(); } long t2 = System.currentTimeMillis(); // 获取结束时间 System.out.println(Thread.currentThread().getName() + "运行时间: " + (t2 - t1) + "ms"); } }); thread_2.start(); long endTime = System.currentTimeMillis(); // 获取结束时间 System.out.println("程序运行时间: " + (endTime - startTime) + "ms"); } }
PeoplePreparedStatementSetter.java
package com.lyx.batch; import java.sql.PreparedStatement; import java.sql.SQLException; import org.springframework.jdbc.core.PreparedStatementSetter; public class PeoplePreparedStatementSetter implements PreparedStatementSetter { public void setValues(PreparedStatement ps) throws SQLException { // TODO Auto-generated method stub ps.setString(1, "%JOHN%"); ps.setString(2, "%DOE%"); // ps.setInt(3, 1); // ps.setInt(4, 100); } }
运行结果:
job1任务正常完成
Thread-3运行时间: 4573ms
job2任务正常完成
Thread-4运行时间: 4627ms
看到每个线程的运行时间都在4秒多。
再看一下在一个线程中运行一组Job的情况:
AppMain3.java
package com.lyx.batch; import javax.sql.DataSource; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.Job; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobParametersBuilder; import org.springframework.batch.core.JobParametersInvalidException; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; import org.springframework.batch.core.repository.JobRestartException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Component; @Component public class AppMain3 { private static JdbcTemplate jdbcTemplate; @Autowired public void setDataSource(DataSource dataSource) { jdbcTemplate = new JdbcTemplate(dataSource); } public static void main(String[] args) throws JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { long startTime = System.currentTimeMillis(); // 获取开始时间 @SuppressWarnings("resource") ApplicationContext context = new ClassPathXmlApplicationContext( new String[] { "classpath:spring-batch4.xml" }); JobLauncher launcher = (JobLauncher) context.getBean("jobLauncher"); int rowCount = jdbcTemplate.queryForObject( "select count(*) from people where " + "first_name like '%JOHN%' or last_name like '%DOE%'", Integer.class); String sql1 = null; String sql2 = null; int mid = (rowCount - 1) >>> 1; if ((rowCount & 1) == 0) { // 偶数 sql1 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? limit 0," + mid; sql2 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? order by person_id desc limit 0," + mid; } else { // 奇数 sql1 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? limit 0," + mid; sql2 = "select first_name ,last_name from people where " + "first_name like ? or last_name like ? order by person_id desc limit 0," + (mid + 1); } JobParametersBuilder job1 = new JobParametersBuilder(); job1.addString("sql1", sql1); Job task1 = (Job) context.getBean("addPeopleDescJob_1"); JobExecution result1 = launcher.run(task1, job1.toJobParameters()); ExitStatus es1 = result1.getExitStatus(); if (es1.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) { System.out.println("job1任务正常完成"); } else { System.out.println("job1任务失败,exitCode=" + es1.getExitCode()); } JobParametersBuilder job2 = new JobParametersBuilder(); // 设置JobParameter job2.addString("sql2", sql2); Job task2 = (Job) context.getBean("addPeopleDescJob_2"); JobExecution result2 = launcher.run(task2, job2.toJobParameters()); ExitStatus es2 = result2.getExitStatus(); if (es2.getExitCode().equals(ExitStatus.COMPLETED.getExitCode())) { System.out.println("job2任务正常完成"); } else { System.out.println("job2任务失败,exitCode=" + es2.getExitCode()); } long endTime = System.currentTimeMillis(); // 获取结束时间 System.out.println("程序运行时间: " + (endTime - startTime) + "ms"); } }
运行结果:
job1任务正常完成
job2任务正常完成
程序运行时间: 8706ms
结果你也看到了,是不是多线程运行一组job效率更高。但是用多线程,配置一组相同的job带来的问题我没有预计到,虽然提高了效率,但可能给job的重试和重启还有job的管理带来了问题。。
==============END==============