MapReduce读取Hbase中的数据

Hbase提供了TableMapReduceUtil工具类,可以直接使用

需要提供的包括,表名、Scan对象、mapper的class对象、输入,输出类型的class对象和job对象

TableMapReduceUtil.initTableMapperJob(
		Names.TABLE.getValue(),//表明
        scan,//Scan类
        AnalysisBeanMapper.class,//关联mapper方法
        AnalysisKey.class,//输入的key类型
        Text.class,//输入的value类型
        job//job
);

使用时需要添加Maven依赖


<dependency>
	<groupId>org.apache.hbasegroupId>
	<artifactId>hbase-mapreduceartifactId>
	<version>2.4.10version>
dependency>

下面是完整的代码

package com.ct.analysis.tool;

import com.ct.analysis.io.MySQLRedisBeanOutputFormat;
import com.ct.analysis.kv.AnalysisKey;
import com.ct.analysis.kv.AnalysisValue;
import com.ct.analysis.mapper.AnalysisBeanMapper;
import com.ct.analysis.reducer.AnalysisBeanReducer;
import com.ct.common.constant.Names;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.util.Tool;

public class AnalysisBeanTool implements Tool{

    @Override
    public int run(String[] args) throws Exception {

        //1、获取job
        Job job = Job.getInstance();

        //2、获取jar包路径
        job.setJarByClass(AnalysisBeanTool.class);

        Scan scan = new Scan();
        //扫描的列族,如果不加则扫描全表
        scan.addFamily(Bytes.toBytes(Names.CF_CALLER.getValue()));


        //3、关联mapoer和reducer
        TableMapReduceUtil.initTableMapperJob(
                Names.TABLE.getValue(),//表明
                scan,//Scan类
                AnalysisBeanMapper.class,//关联mapper方法
                AnalysisKey.class,//输入的key类型
                Text.class,//输入的value类型
                job//job
        );
        job.setReducerClass(AnalysisBeanReducer.class);//关联reducer方法


        //5、设置最终输出的key,value类型
        job.setOutputKeyClass(AnalysisKey.class);
        job.setOutputValueClass(AnalysisValue.class);

        job.setOutputFormatClass(MySQLRedisBeanOutputFormat.class);

        boolean result = job.waitForCompletion(true);
        return result ? JobStatus.State.SUCCEEDED.getValue() : JobStatus.State.FAILED.getValue();

    }

    @Override
    public void setConf(Configuration conf) {

    }

    @Override
    public Configuration getConf() {
        return null;
    }
}

你可能感兴趣的:(大数据学习,hbase,mapreduce,hadoop,maven)