以下操作需要预先在HBase中创建目标表
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class F_SaleOrdersMapReducer extends Configured implements Tool {
private final static String ORDERS_TABLE_NAME="ns1:orders";
private final static String HISTORY_ORDERS_TABLE_NAME="orders:history_orders88";
static class ReadOrderMapper extends TableMapper{
private final static String ORDER_COLUMN_NAME_USER_ID = "user_id";
private final static String ORDER_COLUMN_NAME_ORDER_ID = "order_id";
private final static String ORDER_COLUMN_NAME_DATE = "date";
private final static String HISTORY_ROW_KEY_SEPARATOR = "_";
private final static byte[] HISTORY_COLUMN_FAMILY= Bytes.toBytes( "order" );
private ImmutableBytesWritable mapOutput = new ImmutableBytesWritable( );
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//编写专门的方法,转换数据,得到Put对象
Put put = resultToPut(key,value);
//输出rowKey
mapOutput.set( put.getRow() );
//输出
context.write( mapOutput,put );
}
private Put resultToPut(ImmutableBytesWritable key, Result result) {
//订单Id
String orderId = Bytes.toString( key.get() );
//date,user_id,order_amt
HashMap orderMap = new HashMap<>();
for (Cell cell:result.rawCells()) {
String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
String value = Bytes.toString(CellUtil.cloneValue( cell ));
orderMap.put( filed ,value);
}
//组合rowKey:userId + orderDate + orderId
StringBuffer sb = new StringBuffer();
//reverse(userId)
sb.append( orderMap.get( ORDER_COLUMN_NAME_USER_ID ) ).reverse();
sb.append( HISTORY_ROW_KEY_SEPARATOR );
//date
sb.append( orderMap.get( ORDER_COLUMN_NAME_DATE ) );
sb.append( HISTORY_ROW_KEY_SEPARATOR );
sb.append( orderId );
//创建Put对象
Put put = new Put(Bytes.toBytes( sb.toString() ));
for (Map.Entry entry:orderMap.entrySet()) {
put.addColumn(
HISTORY_COLUMN_FAMILY,
Bytes.toBytes( entry.getKey() ),
Bytes.toBytes( entry.getValue() ) );
}
put.addColumn(
HISTORY_COLUMN_FAMILY,
Bytes.toBytes( ORDER_COLUMN_NAME_ORDER_ID ),
Bytes.toBytes( orderId ) );
return put;
}
}
@Override
public int run(String[] args) throws Exception {
//读取配置
Configuration conf = this.getConf();
//创建Job
Job job = Job.getInstance( conf, F_SaleOrdersMapReducer.class.getName() );
job.setJarByClass( F_SaleOrdersMapReducer.class );
//设置Job:
//input:table ->map ->output:table
Scan scan = new Scan();
// 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCaching(500);
// don't set to true for MR jobs
scan.setCacheBlocks(false);
//设置Mapper类和Input table
TableMapReduceUtil.initTableMapperJob(
ORDERS_TABLE_NAME, // input HBase table name
scan, // Scan instance to control CF and attribute selection
ReadOrderMapper.class, // mapper
ImmutableBytesWritable.class, // mapper output key,RowKey
Put.class, // mapper output value,行内容
job);
//设置输出以及Reducer
TableMapReduceUtil.initTableReducerJob(
HISTORY_ORDERS_TABLE_NAME, // output table
null, // reducer class
job);
job.setNumReduceTasks(0);
boolean isSuccess = job.waitForCompletion( true );
return isSuccess?0:1;
}
public static void main(String[] args) {
//HBase配置文件
Configuration conf = HBaseConfiguration.create();
try {
//运行job
int status = ToolRunner.run( conf, new F_SaleOrdersMapReducer(), args );
//结束程序
System.exit( status );
} catch (Exception e) {
e.printStackTrace();
}
}
}
以下操作需要预先在HBase中创建目标表
并输入暂时存放数据的HDFS目录
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class G_SaleOrdersMapReducer extends Configured implements Tool {
private final static String ORDERS_TABLE_NAME="ns1:orders";
private final static String HISTORY_ORDERS_TABLE_NAME="orders:history_orders89";
static class ReadOrderMapper extends TableMapper{
private final static String ORDER_COLUMN_NAME_USER_ID = "user_id";
private final static String ORDER_COLUMN_NAME_ORDER_ID = "order_id";
private final static String ORDER_COLUMN_NAME_DATE = "date";
private final static String HISTORY_ROW_KEY_SEPARATOR = "_";
private final static byte[] HISTORY_COLUMN_FAMILY= Bytes.toBytes( "order" );
private ImmutableBytesWritable mapOutput = new ImmutableBytesWritable( );
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//编写专门的方法,转换数据,得到Put对象
Put put = resultToPut(key,value);
//输出rowKey
mapOutput.set( put.getRow() );
//输出
context.write( mapOutput,put );
}
private Put resultToPut(ImmutableBytesWritable key, Result result) {
//订单Id
String orderId = Bytes.toString( key.get() );
//date,user_id,order_amt
HashMap orderMap = new HashMap<>();
for (Cell cell:result.rawCells()) {
String filed = Bytes.toString(CellUtil.cloneQualifier( cell ));
String value = Bytes.toString(CellUtil.cloneValue( cell ));
orderMap.put( filed ,value);
}
//组合rowKey:userId + orderDate + orderId
StringBuffer sb = new StringBuffer();
//reverse(userId)
sb.append( orderMap.get( ORDER_COLUMN_NAME_USER_ID ) ).reverse();
sb.append( HISTORY_ROW_KEY_SEPARATOR );
//date
sb.append( orderMap.get( ORDER_COLUMN_NAME_DATE ) );
sb.append( HISTORY_ROW_KEY_SEPARATOR );
sb.append( orderId );
//创建Put对象
Put put = new Put(Bytes.toBytes( sb.toString() ));
for (Map.Entry entry:orderMap.entrySet()) {
put.addColumn(
HISTORY_COLUMN_FAMILY,
Bytes.toBytes( entry.getKey() ),
Bytes.toBytes( entry.getValue() ) );
}
put.addColumn(
HISTORY_COLUMN_FAMILY,
Bytes.toBytes( ORDER_COLUMN_NAME_ORDER_ID ),
Bytes.toBytes( orderId ) );
return put;
}
}
@Override
public int run(String[] args) throws Exception {
//读取配置
Configuration conf = this.getConf();
//创建Job
Job job = Job.getInstance( conf, G_SaleOrdersMapReducer.class.getName() );
job.setJarByClass( G_SaleOrdersMapReducer.class );
//设置Job:
//input:table ->map ->output:table
Scan scan = new Scan();
// 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCaching(500);
// don't set to true for MR jobs
scan.setCacheBlocks(false);
//设置Mapper类和Input table
TableMapReduceUtil.initTableMapperJob(
ORDERS_TABLE_NAME, // input HBase table name
scan, // Scan instance to control CF and attribute selection
ReadOrderMapper.class, // mapper
ImmutableBytesWritable.class, // mapper output key,RowKey
Put.class, // mapper output value,行内容
job);
//设置输出以及Reducer
TableMapReduceUtil.initTableReducerJob(
HISTORY_ORDERS_TABLE_NAME, // output table
null, // reducer class
job);
job.setNumReduceTasks(0);
//如果数据量非常大的情况下,不建议使用put方式将数据插入到HBASE表中,
//而是将数据转成HBASE数据存储的HFile
//设置MapReduce输出的数据格式
job.setOutputFormatClass( HFileOutputFormat2.class );
//往那张表里面写
HTable table = new HTable( conf, HISTORY_ORDERS_TABLE_NAME );
HFileOutputFormat2.configureIncrementalLoad( job, table,table.getRegionLocator());
//设置HFile文件的输出目录
Path outputPath = new Path(args[0] + System.currentTimeMillis());
FileOutputFormat.setOutputPath( job, outputPath);
boolean isSuccess = job.waitForCompletion( true );
//如果MapReduce运行完成,成功之后,将输出HFile文件 加载到 表中
if(isSuccess){
LoadIncrementalHFiles load = new LoadIncrementalHFiles( conf );
load.doBulkLoad( outputPath, table);
}
return isSuccess?0:1;
}
public static void main(String[] args) {
//HBase配置文件
Configuration conf = HBaseConfiguration.create();
try {
//运行job
int status = ToolRunner.run( conf, new G_SaleOrdersMapReducer(), args );
//结束程序
System.exit( status );
} catch (Exception e) {
e.printStackTrace();
}
}
}