Spring Batch能够支持简单的、复杂的和大数据量的批处理作业,是一个批处理应用框架,不是调度框架,但需要和调度框架(Quartz, Tivoli, Control-M, Cron等)合作来构建完成批处理任务。整体架构如下:
注意:定义 Job 基础设施: 主要配置任务仓库、任务调度器和任务执行中用到的事务管理器。如下:
id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean" />
id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
id="transactionManager" class="org.springframework.batch.support.transaction.ResoourcelessTransactionManager" />
commons-batch.xml (主要配置任务仓库,任务调度器和事务管理器等)
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:aop="http://www.springframework.org/schema/aop"
xmlns:batch="http://www.springframework.org/schema/batch" xmlns:context="http://www.springframework.org/schema/context"
xmlns:ehcache="http://ehcache-spring-annotations.googlecode.com/svn/schema/ehcache-spring"
xsi:schemaLocation="
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.2.xsd
http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-4.2.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-4.2.xsd
">
<aop:aspectj-autoproxy />
<context:component-scan base-package="com.job">
<context:exclude-filter type="aspectj" expression="(com.job..*)" />
context:component-scan>
<bean class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer">
<property name="ignoreResourceNotFound" value="true" />
<property name="locations">
<list merge="true">
<value>classpath:databaseConfigs.propertiesvalue>
<value>classpath:batch-testDatas.propertiesvalue>
list>
property>
<property name="ignoreUnresolvablePlaceholders" value="true" />
bean>
<bean id="inMemoryJobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
<property name="transactionManager" ref="transactionManager" />
bean>
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="inMemoryJobRepository" />
bean>
<bean id="abstractJobBatch" class="com.job.batch.services.base.AbstractJobBatch" abstract="true"/>
beans>
jobBatch.xml ( job配置 )
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:context="http://www.springframework.org/schema/context" xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.2.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-4.2.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-4.2.xsd">
<import resource="classpath:commons-batch.xml" />
<batch:job id="jobBatch" job-repository="inMemoryJobRepository">
<batch:step id="stepStart">
<batch:tasklet ref="jobTaskletStart" />
<batch:next on="FAILED" to="stepFailed" />
<batch:next on="*" to="importData" />
batch:step>
<batch:step id="stepFailed">
<batch:tasklet ref="jobTaskletFailed" />
batch:step>
<batch:step id="stepCompleted">
<batch:tasklet ref="jobTaskletCompleted" />
batch:step>
<batch:step id="importData">
<batch:tasklet>
<batch:chunk reader="testReader" processor="testProcessor" writer="testWriter" commit-interval="${batch.commit.interval}" />
batch:tasklet>
<batch:next on="FAILED" to="stepFailed" />
<batch:next on="*" to="anotherStep" />
batch:step>
<batch:step id="anotherStep">
<batch:tasklet>
<batch:chunk reader="anotherFileItemReader" writer="anotherFileItemWriter" commit-interval="100" >
<batch:streams>
<batch:stream ref="aaaFlatFileItemWriter"/>
<batch:stream ref="bbbFlatFileItemWriter"/>
<batch:stream ref="cccFlatFileItemWriter"/>
batch:streams>
batch:chunk>
batch:tasklet>
<batch:next on="FAILED" to="stepFailed" />
<batch:next on="*" to="..." />
batch:step>
...
AbstractJobBatch.java ( 配置抽象类的公共方法 )
public abstract class AbstractPoiBatch{
// 注入 service, 具体实现就不写了
@Inject
private JobBatchService jobBatchService;
// 开始执行向数据库插入 log 信息
public Integer initLogInDatabase(final String name, ...) throws jobServiceException;
// 向数据库添加具体的 log 信息
public void addLogDetail(final Integer batchId, ...) throws jobServiceException{}
// 向数据库添加失败的 log 信息
public void closeFailedLog(final Integer batchId, ...) throws jobServiceException{}
// 向数据库添加成功的 log 信息
public void closeCompletedLog(final Integer batchId, ...) throws jobServiceException{}
}
jobTaskletStart, (具体的开始,成功,失败的类均要实现 Tasklet 的 execute 方法)
@Named
public class JobTaskletStart extends AbstractJobBatch implements Tasklet {
@Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
contribution.setExitStatus(ExitStatus.COMPLETED);
String jobName = chunkContext.getStepContext().getJobName();
Map batchParameters = chunkContext.getStepContext().getJobParameters();
Integer batchId = initLogInDatabase(jobName,...);
// ExecutionContext 类: final Map map;
// 所以其中可以存放jobName, batchId, timestamp, fileName...
ExecutionContext jobExecutionContext = chunkContext.getStepContext().getStepExecution().getJobExecution().getExecutionContext();
jobExecutionContext.put(BATCH_NAME, jobName);
return RepeatStatus.FINISHED;
}
}
testReader
@Named("testReader")
public class TestReader extends FlatFileItemReader<TestDto> {
private ExecutionContext executionContext;
@BeforeStep
public void beforeStep(StepExecution stepExecution){
executionContext = stepExecution.getJobExecution().getExecutionContext();
// 读取之前的一些配置,如设置 tokenizer, lineMapper, ...
}
@Override
protected TestDto doRead() throws SfjServiceException {
TestDto dto = super.doRead();
return dto;
}
}
testProcessor
@Named("testProcessor")
public class TestProcessor implements ItemProcessor<TestDto, Test> {
@Override
public Test process(TestDto item) throws Exception {
Test test = new Test();
// 具体转换处理的步骤
return test;
}
}
testWriter
@Named("testWriter")
public class TestWriter implements ItemWriter<Test> {
// 一般需要注入 service 来实现具体的操作
@Inject
private TestService testService;
@BeforeStep
public void beforeStep(StepExecution stepExecution) {
// 执行前的操作
ExecutionContext executionContext = stepExecution.getJobExecution().getExecutionContext();
}
@Override
public void write(List extends Test> items){
for (Test test : items) {
// 具体的插入表操作等...
}
}
@AfterStep
public void afterStep(StepExecution stepExecution){
// 执行后进行的操作,主要是更新数据库 log 的状态(成功或者失败)
}
}