使用MapReduce把HBase表中的数据迁移到另一张HBase表中

目标:将HBase中student表中的数据,通过MR迁入student_mr表中。

1. 添加Maven依赖

 

        
            org.apache.hbase
            hbase-server
            1.3.1
        

        
            org.apache.hbase
            hbase-client
            1.3.1
        

    

2. 查看HBase的MapReduce任务的执行

[hadoop@hadoop112 hbase-1.3.1]$ bin/hbase mapredcp
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/module/hbase-1.3.1/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/module/hadoop-2.7.2/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
/opt/module/hbase-1.3.1/lib/zookeeper-3.4.6.jar:/opt/module/hbase-1.3.1/lib/guava-12.0.1.jar:/opt/module/hbase-1.3.1/lib/metrics-core-2.2.0.jar:/opt/module/hbase-1.3.1/lib/protobuf-java-2.5.0.jar:/opt/module/hbase-1.3.1/lib/hbase-common-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar:/opt/module/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar:/opt/module/hbase-1.3.1/lib/hbase-client-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-hadoop-compat-1.3.1.jar:/opt/module/hbase-1.3.1/lib/netty-all-4.0.23.Final.jar:/opt/module/hbase-1.3.1/lib/hbase-server-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-prefix-tree-1.3.1.jar

3. 环境变量的导入

(1)执行环境变量的导入(临时生效,在命令行执行下述操作)

[hadoop@hadoop112 hbase-1.3.1]$  export HBASE_HOME=/opt/module/hbase-1.3.1
[hadoop@hadoop112 hbase-1.3.1]$  export HADOOP_HOME=/opt/module/hadoop-2.7.2
[hadoop@hadoop112 hbase-1.3.1]$  export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

(2)永久生效:在/etc/profile配置

[hadoop@hadoop112 hbase-1.3.1]$  export HBASE_HOME=/opt/module/hbase-1.3.1
[hadoop@hadoop112 hbase-1.3.1]$  export HADOOP_HOME=/opt/module/hadoop-2.7.2

并在hadoop-env.sh中配置:(注意:在for循环之后配)

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*

4. 构建ReadStudentMapper类,用于读取student表中的数据

package com.fczheng.mr1;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;


/**
 * 从HBase中读取数据并包装成我们需要的形式
 * TableMapper是Mapper的子类,专门用于读取HBase表中的数据
 * key:表中的rowkey
 * Result:rowkey对应一行的结果集
 * @author fczheng
 * @create 2019-08-16 16:27
 */
public class ReadStudentMapper extends TableMapper {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

        //创建一个Put对象
        Put put = new Put(key.get());

        Cell[] cells = value.rawCells();
        

        for (Cell cell : cells) {
            put.add(cell);
        }

        context.write(key,put);

    }
}

 5.  构建ReadStudentReducer类,用于将读取到的student表中的数据写入到student_mr表中

package com.fczheng.mr1;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;

/**
 * 将自定义的数据包装成HBase希望的Mutation型
 * TableRducer专门用于输出数据到HBase表中,继承了Reducer
 * @author fczheng
 * @create 2019-08-16 16:27
 */
public class ReadStudentReducer extends TableReducer {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable values, Context context) throws IOException, InterruptedException {

        for (Put value : values) {
            context.write(NullWritable.get(),value);
        }
    }
}

6.构建ReadStudentDriver implements Tool用于组装运行Job任务

package com.fczheng.mr1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * @author fczheng
 * @create 2019-08-16 16:28
 */
public class ReadStudentDriver implements Tool {

    private Configuration conf;

    /**
     *
     * @param args args[0]:输入表,args[1]:输出表,args[2]:startrow,args[3]:stoprow
     * @return
     * @throws Exception
     */

    @Override
    public int run(String[] args) throws Exception {

        Job job = Job.getInstance(conf, "StudentMR");

        job.setJarByClass(ReadStudentDriver.class);

        //Scan scan = new Scan(args[2].getBytes(), args[3].getBytes());
        Scan scan = new Scan();
        TableMapReduceUtil.initTableMapperJob(
                args[0],                    //源表名
                scan,                       //扫描范围
                ReadStudentMapper.class,    //Mapper
                ImmutableBytesWritable.class,         //KeyOut
                Put.class,                  //ValueOut
                job
        );

        TableMapReduceUtil.initTableReducerJob(
                args[1],                    //目标表名
                ReadStudentReducer.class,   //Reducer
                job
        );

        boolean res = job.waitForCompletion(true);
        return res ? 0 : 1 ;
    }

    @Override
    public void setConf(Configuration conf) {
        this.conf = conf;

    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    public static void main(String[] args) throws Exception {
      /*  Tool tool;

        if("mr1".equals(args[0])){
            tool = new ReadStudentDriver();
        }else {
            tool = new ReadStudentDriver();
        }

        ToolRunner.run(HBaseConfiguration.create(),tool,args);*/

        Configuration configuration = HBaseConfiguration.create();
        int status = ToolRunner.run(configuration, new ReadStudentDriver(), args);
        System.exit(status);
    }
}

7. 打包运行任务

yarn  jar hbase-plugin-1.0-SNAPSHOT.jar com.fczheng.mr1.ReadStudentDriver  student student_mr

提示:运行任务前,如果待数据导入的表不存在,则需要提前创建。

你可能感兴趣的:(HBase)