Apache HBase MapReduce实现导出数据到HBase

源HBASE表数据:

2.4.1 :030 > scan 'mktest:mk1'
ROW                                     COLUMN+CELL                                                                                                      
 95001                                  column=user:age, timestamp=1554205885644, value=20                                                               
 95001                                  column=user:dept, timestamp=1554205885644, value=CS                                                              
 95001                                  column=user:name, timestamp=1554205885644, value=\xE6\x9D\x8E\xE5\x8B\x87                                        
 95001                                  column=user:sex, timestamp=1554205885644, value=\xE7\x94\xB7                                                     
 95002                                  column=user:age, timestamp=1554205885644, value=19                                                               
 95002                                  column=user:dept, timestamp=1554205885644, value=IS                                                              
 95002                                  column=user:name, timestamp=1554205885644, value=\xE5\x88\x98\xE6\x99\xA8                                        
 95002                                  column=user:sex, timestamp=1554205885644, value=\xE5\xA5\xB3                                                     
 95003                                  column=user:age, timestamp=1554205885644, value=22                                                               
 95003                                  column=user:dept, timestamp=1554205885644, value=MA                                                              
 95003                                  column=user:name, timestamp=1554205885644, value=\xE7\x8E\x8B\xE6\x95\x8F                                        
 95003                                  column=user:sex, timestamp=1554205885644, value=\xE5\xA5\xB3  
 .....

1.MapReduce 程序设计

1)Map端
package com.mycat.hdemo.hbase2hbase;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

import java.io.IOException;

public class HBase2HBaseMapper extends TableMapper<Text, Text> {
    private Text mk=new Text();
    private Text mv=new Text();
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        Cell[] cells = value.rawCells();
        mk.set(Bytes.toString(key.get()));
        for (Cell c : cells) {
            String name=Bytes.toString(c.getQualifierArray(),c.getQualifierOffset(),c.getQualifierLength());
            String val = Bytes.toString(c.getValueArray(), c.getValueOffset(), c.getValueLength());
            mv.set(name+","+val);
            context.write(mk,mv);
        }
    }
}
2)Reduce端
package com.mycat.hdemo.hbase2hbase;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;

public class HBase2HBaseReducer extends TableReducer<Text, Text , NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Put p=new Put(Bytes.toBytes(key.toString()));
        for (Text value : values) {
            String[] sps = value.toString().split(",");
            p.addColumn(Bytes.toBytes("user"),Bytes.toBytes(sps[0]),Bytes.toBytes(sps[1]));
        }
        context.write(NullWritable.get(),p);
    }
}
3)Driver类
package com.mycat.hdemo.hbase2hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class HBase2HBaseDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf=new Configuration();
        conf.set("fs.defaultFS","hdfs://mkmg/");
        conf.set("hbase.zookeeper.quorum","mycat01:2181,mycat02:2181,mycat03:2181");
        Job job = Job.getInstance(conf);

        job.setJarByClass(HBase2HBaseDriver.class);


        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);

        Scan scan=new Scan();
        TableMapReduceUtil.initTableMapperJob("mktest:mk1",scan,HBase2HBaseMapper.class, Text.class,Text.class,job,false);

        TableMapReduceUtil.initTableReducerJob("mktest:mk3",HBase2HBaseReducer.class,job,null,null,null,null,false);

        FileOutputFormat.setOutputPath(job,new Path("/user/po"));

        job.waitForCompletion(true);
    }
}

2.结果输出

2.4.1 :029 > scan 'mktest:mk3'
ROW                                     COLUMN+CELL                                                                                                      
 95001                                  column=user:age, timestamp=1554208964508, value=20                                                               
 95001                                  column=user:dept, timestamp=1554208964508, value=CS                                                              
 95001                                  column=user:name, timestamp=1554208964508, value=\xE6\x9D\x8E\xE5\x8B\x87                                        
 95001                                  column=user:sex, timestamp=1554208964508, value=\xE7\x94\xB7                                                     
 95002                                  column=user:age, timestamp=1554208964508, value=19                                                               
 95002                                  column=user:dept, timestamp=1554208964508, value=IS                                                              
 95002                                  column=user:name, timestamp=1554208964508, value=\xE5\x88\x98\xE6\x99\xA8                                        
 95002                                  column=user:sex, timestamp=1554208964508, value=\xE5\xA5\xB3                                                     
 95003                                  column=user:age, timestamp=1554208964508, value=22                                                               
 95003                                  column=user:dept, timestamp=1554208964508, value=MA                                                              
 95003                                  column=user:name, timestamp=1554208964508, value=\xE7\x8E\x8B\xE6\x95\x8F                                        
 95003                                  column=user:sex, timestamp=1554208964508, value=\xE5\xA5\xB3                                                     
 95004                                  column=user:age, timestamp=1554208964508, value=19                                                               
 95004                                  column=user:dept, timestamp=1554208964508, value=IS    
 .....

你可能感兴趣的:(HBASE)