源HBASE表数据:
2.4.1 :030 > scan 'mktest:mk1'
ROW COLUMN+CELL
95001 column=user:age, timestamp=1554205885644, value=20
95001 column=user:dept, timestamp=1554205885644, value=CS
95001 column=user:name, timestamp=1554205885644, value=\xE6\x9D\x8E\xE5\x8B\x87
95001 column=user:sex, timestamp=1554205885644, value=\xE7\x94\xB7
95002 column=user:age, timestamp=1554205885644, value=19
95002 column=user:dept, timestamp=1554205885644, value=IS
95002 column=user:name, timestamp=1554205885644, value=\xE5\x88\x98\xE6\x99\xA8
95002 column=user:sex, timestamp=1554205885644, value=\xE5\xA5\xB3
95003 column=user:age, timestamp=1554205885644, value=22
95003 column=user:dept, timestamp=1554205885644, value=MA
95003 column=user:name, timestamp=1554205885644, value=\xE7\x8E\x8B\xE6\x95\x8F
95003 column=user:sex, timestamp=1554205885644, value=\xE5\xA5\xB3
.....
package com.mycat.hdemo.hbase2hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class HBase2HBaseMapper extends TableMapper<Text, Text> {
private Text mk=new Text();
private Text mv=new Text();
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
Cell[] cells = value.rawCells();
mk.set(Bytes.toString(key.get()));
for (Cell c : cells) {
String name=Bytes.toString(c.getQualifierArray(),c.getQualifierOffset(),c.getQualifierLength());
String val = Bytes.toString(c.getValueArray(), c.getValueOffset(), c.getValueLength());
mv.set(name+","+val);
context.write(mk,mv);
}
}
}
package com.mycat.hdemo.hbase2hbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class HBase2HBaseReducer extends TableReducer<Text, Text , NullWritable> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Put p=new Put(Bytes.toBytes(key.toString()));
for (Text value : values) {
String[] sps = value.toString().split(",");
p.addColumn(Bytes.toBytes("user"),Bytes.toBytes(sps[0]),Bytes.toBytes(sps[1]));
}
context.write(NullWritable.get(),p);
}
}
package com.mycat.hdemo.hbase2hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class HBase2HBaseDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
conf.set("fs.defaultFS","hdfs://mkmg/");
conf.set("hbase.zookeeper.quorum","mycat01:2181,mycat02:2181,mycat03:2181");
Job job = Job.getInstance(conf);
job.setJarByClass(HBase2HBaseDriver.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Put.class);
Scan scan=new Scan();
TableMapReduceUtil.initTableMapperJob("mktest:mk1",scan,HBase2HBaseMapper.class, Text.class,Text.class,job,false);
TableMapReduceUtil.initTableReducerJob("mktest:mk3",HBase2HBaseReducer.class,job,null,null,null,null,false);
FileOutputFormat.setOutputPath(job,new Path("/user/po"));
job.waitForCompletion(true);
}
}
2.4.1 :029 > scan 'mktest:mk3'
ROW COLUMN+CELL
95001 column=user:age, timestamp=1554208964508, value=20
95001 column=user:dept, timestamp=1554208964508, value=CS
95001 column=user:name, timestamp=1554208964508, value=\xE6\x9D\x8E\xE5\x8B\x87
95001 column=user:sex, timestamp=1554208964508, value=\xE7\x94\xB7
95002 column=user:age, timestamp=1554208964508, value=19
95002 column=user:dept, timestamp=1554208964508, value=IS
95002 column=user:name, timestamp=1554208964508, value=\xE5\x88\x98\xE6\x99\xA8
95002 column=user:sex, timestamp=1554208964508, value=\xE5\xA5\xB3
95003 column=user:age, timestamp=1554208964508, value=22
95003 column=user:dept, timestamp=1554208964508, value=MA
95003 column=user:name, timestamp=1554208964508, value=\xE7\x8E\x8B\xE6\x95\x8F
95003 column=user:sex, timestamp=1554208964508, value=\xE5\xA5\xB3
95004 column=user:age, timestamp=1554208964508, value=19
95004 column=user:dept, timestamp=1554208964508, value=IS
.....