原文作者: Steven Haines - 技术架构师
编写批处理程序来处理GB级别数据量无疑是种海啸般难以面对的任务,但我们可以用Spring Batch将其拆解为小块小块的(chunk)。 Spring Batch 是Spring框架的一个模块,专门设计来对各种类型的文件进行批量处理。 本文先讲解一个简单的作业—— 将产品列表从CSV文件中读取出来,然后导入MySQL数据库中; 然后我们一起研究 Spring Batch 模块的批处理功能(/性能),如单/多处理单元(processors), 同时辅以多个微线程(tasklets); 最后简要介绍Spring Batch对跳过记录(skipping), 重试记录(retrying),以及批处理作业的重启(restarting )等弹性工具。
如果你曾在Java企业系统中用批处理来处理过成千上万的数据交换,那你就知道工作负载是怎么回事。 批处理系统要处理庞大无比的数据量,处理单条记录失败的情况,还要管理中断,在重启动后不要再去处理那些已经执行过的部分。
对于没有相关经验的初学者,下面是需要批处理的一些场景,并且如果使用Spring Batch 很可能会节省你很多宝贵的时间:
package com.geekcap.javaworld.springbatchexample.simple.model;
/**
* 代表产品的简单值对象(POJO)
*/
public class Product
{
private int id;
private String name;
private String description;
private int quantity;
public Product() {
}
public Product(int id, String name, String description, int quantity) {
this.id = id;
this.name = name;
this.description = description;
this.quantity = quantity;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public int getQuantity() {
return quantity;
}
public void setQuantity(int quantity) {
this.quantity = quantity;
}
}
package com.geekcap.javaworld.springbatchexample.simple.reader;
import com.geekcap.javaworld.springbatchexample.simple.model.Product;
import org.springframework.batch.item.file.mapping.FieldSetMapper;
import org.springframework.batch.item.file.transform.FieldSet;
import org.springframework.validation.BindException;
/**
* 根据 CSV 文件中的字段集合构建 Product 对象
*/
public class ProductFieldSetMapper implements FieldSetMapper
{
@Override
public Product mapFieldSet(FieldSet fieldSet) throws BindException {
Product product = new Product();
product.setId( fieldSet.readInt( "id" ) );
product.setName( fieldSet.readString( "name" ) );
product.setDescription( fieldSet.readString( "description" ) );
product.setQuantity( fieldSet.readInt( "quantity" ) );
return product;
}
}
package com.geekcap.javaworld.springbatchexample.simple.writer;
import com.geekcap.javaworld.springbatchexample.simple.model.Product;
import org.springframework.batch.item.ItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
/**
* Writes products to a database
*/
public class ProductItemWriter implements ItemWriter
{
private static final String GET_PRODUCT = "select * from PRODUCT where id = ?";
private static final String INSERT_PRODUCT = "insert into PRODUCT (id,name,description,quantity) values (?,?,?,?)";
private static final String UPDATE_PRODUCT = "update PRODUCT set name = ?, description = ?,quantity = ? where id = ?";
@Autowired
private JdbcTemplate jdbcTemplate;
@Override
public void write(List extends Product> products) throws Exception
{
for( Product product : products )
{
List productList = jdbcTemplate.query(GET_PRODUCT, new Object[] {product.getId()}, new RowMapper() {
@Override
public Product mapRow( ResultSet resultSet, int rowNum ) throws SQLException {
Product p = new Product();
p.setId( resultSet.getInt( 1 ) );
p.setName( resultSet.getString( 2 ) );
p.setDescription( resultSet.getString( 3 ) );
p.setQuantity( resultSet.getInt( 4 ) );
return p;
}
});
if( productList.size() > 0 )
{
jdbcTemplate.update( UPDATE_PRODUCT, product.getName(), product.getDescription(), product.getQuantity(), product.getId() }
else
{
jdbcTemplate.update( INSERT_PRODUCT, product.getId(), product.getName(), product.getDescription(), product.getQuantity() }
}
}
}
4.0.0
com.geekcap.javaworld
spring-batch-example
1.0-SNAPSHOT
jar
spring-batch-example
http://maven.apache.org
UTF-8
3.2.1.RELEASE
2.2.1.RELEASE
1.6
org.springframework
spring-context
${spring.version}
org.springframework
spring-core
${spring.version}
org.springframework
spring-beans
${spring.version}
org.springframework
spring-jdbc
${spring.version}
org.springframework.batch
spring-batch-core
${spring.batch.version}
org.springframework.batch
spring-batch-infrastructure
${spring.batch.version}
commons-dbcp
commons-dbcp
1.4
mysql
mysql-connector-java
5.1.27
junit
junit
4.11
test
org.apache.maven.plugins
maven-compiler-plugin
${java.version}
org.apache.maven.plugins
maven-jar-plugin
true
lib/
org.apache.maven.plugins
maven-dependency-plugin
copy
install
copy-dependencies
${project.build.directory}/lib
spring-batch-example
CREATE TABLE PRODUCT (
ID INT NOT NULL,
NAME VARCHAR(128) NOT NULL,
DESCRIPTION VARCHAR(128),
QUANTITY INT,
PRIMARY KEY(ID)
);
id,name,description,quantity
1,Product One,This is product 1, 10
2,Product Two,This is product 2, 20
3,Product Three,This is product 3, 30
4,Product Four,This is product 4, 20
5,Product Five,This is product 5, 10
6,Product Six,This is product 6, 50
7,Product Seven,This is product 7, 80
8,Product Eight,This is product 8, 90
Nov 12, 2013 4:09:17 PM org.springframework.context.support.AbstractApplicationContext prepareRefresh
INFO: Refreshing org.springframework.context.support.ClassPathXmlApplicationContext@6b4da8f4: startup date [Tue Nov 12 16:09:17 EST 2013]; Nov 12, 2013 4:09:17 PM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions
INFO: Loading XML bean definitions from class path resource [jobs/file-import-job.xml]
Nov 12, 2013 4:09:18 PM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions
INFO: Loading XML bean definitions from class path resource [applicationContext.xml]
Nov 12, 2013 4:09:19 PM org.springframework.beans.factory.support.DefaultListableBeanFactory registerBeanDefinition
INFO: Overriding bean definition for bean 'simpleFileImportJob': replacing [Generic bean: class [org.springframework.batch.core.configuration.Nov 12, 2013 4:09:19 PM org.springframework.beans.factory.support.DefaultListableBeanFactory registerBeanDefinition
INFO: Overriding bean definition for bean 'productReader': replacing [Generic bean: class [org.springframework.batch.item.file.FlatFileItemReader]; Nov 12, 2013 4:09:19 PM org.springframework.beans.factory.support.DefaultListableBeanFactory preInstantiateSingletons
INFO: Pre-instantiating singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@6aba4211: defining beans [org.Nov 12, 2013 4:09:19 PM org.springframework.batch.core.launch.support.SimpleJobLauncher afterPropertiesSet
INFO: No TaskExecutor has been set, defaulting to synchronous executor.
Nov 12, 2013 4:09:22 PM org.springframework.batch.core.launch.support.SimpleJobLauncher$1 run
INFO: Job: [FlowJob: [name=simpleFileImportJob]] launched with the following parameters: [{inputFile=sample.csv}]
Nov 12, 2013 4:09:22 PM org.springframework.batch.core.job.SimpleStepHandler handleStep
INFO: Executing step: [importFileStep]
Nov 12, 2013 4:09:22 PM org.springframework.batch.core.launch.support.SimpleJobLauncher$1 run
INFO: Job: [FlowJob: [name=simpleFileImportJob]] completed with the following parameters: [{inputFile=sample.csv}] and the following status: Nov 12, 2013 4:09:22 PM org.springframework.context.support.AbstractApplicationContext doClose
INFO: Closing org.springframework.context.support.ClassPathXmlApplicationContext@6b4da8f4: startup date [Tue Nov 12 16:09:17 EST 2013]; Nov 12, 2013 4:09:22 PM org.springframework.beans.factory.support.DefaultSingletonBeanRegistry destroySingletons
INFO: Destroying singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@6aba4211: defining
package com.geekcap.javaworld.springbatchexample.simple.processor;
import com.geekcap.javaworld.springbatchexample.simple.model.Product;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
/**
* Processor that finds existing products and updates a product quantity appropriately
*/
public class ProductItemProcessor implements ItemProcessor
{
private static final String GET_PRODUCT = "select * from PRODUCT where id = ?";
@Autowired
private JdbcTemplate jdbcTemplate;
@Override
public Product process(Product product) throws Exception
{
// Retrieve the product from the database
List productList = jdbcTemplate.query(GET_PRODUCT, new Object[] {product.getId()}, new RowMapper() {
@Override
public Product mapRow( ResultSet resultSet, int rowNum ) throws SQLException {
Product p = new Product();
p.setId( resultSet.getInt( 1 ) );
p.setName( resultSet.getString( 2 ) );
p.setDescription( resultSet.getString( 3 ) );
p.setQuantity( resultSet.getInt( 4 ) );
return p;
}
});
if( productList.size() > 0 )
{
// Add the new quantity to the existing quantity
Product existingProduct = productList.get( 0 );
product.setQuantity( existingProduct.getQuantity() + product.getQuantity() );
}
// Return the (possibly) update prduct
return product;
}
}
package com.geekcap.javaworld.springbatchexample.simple.tasklet;
import org.apache.commons.io.FileUtils;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import java.io.File;
/**
* A tasklet that archives the input file
*/
public class ArchiveProductImportFileTasklet implements Tasklet
{
private String inputFile;
@Override
public RepeatStatus execute(StepContribution stepContribution, ChunkContext chunkContext) throws Exception
{
// Make our destination directory and copy our input file to it
File archiveDir = new File( "archive" );
FileUtils.forceMkdir( archiveDir );
FileUtils.copyFileToDirectory( new File( inputFile ), archiveDir );
// We're done...
return RepeatStatus.FINISHED;
}
public String getInputFile() {
return inputFile;
}
public void setInputFile(String inputFile) {
this.inputFile = inputFile;
}
}