使用MapReduce读取Hbase数据到本地磁盘

本文主要讲述读取hbase数据到本地磁盘,且仅一个分区文件,若想一次输出多个分区文件,参考本人上篇文章:
https://blog.csdn.net/weixin_43345864/article/details/84889874
代码如下:
注意Hadoop版本问题:
pom.xml如下


        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    1.6
                    1.6
                
            
        
    
    
        
        
            org.apache.kafka
            kafka-clients
            0.11.0.1
        
        
        
            org.apache.kafka
            kafka-streams
            0.11.0.1
        
        
        
            org.apache.hbase
            hbase-server
            1.3.1
        
        
        
        
            org.apache.hbase
            hbase-client
            1.3.1
        
        
        
            junit
            junit
            3.8.1
            test
        
        
        
            org.apache.hadoop
            hadoop-client
            2.6.1
        
        
        
            org.apache.hadoop
            hadoop-common
            2.6.1
        
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;

public class PhoneHbaseMysql{
    public static void main(String[] args) throws Exception {
        //获取hbase连接
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","hadoop01,hadoop02,hadoop03");
        conf.set("hbase.zookeeper.property.clientport","2181");
        Job job = Job.getInstance(conf);
        job.setJarByClass(PhoneHbaseMysql.class);
        initHBaseInputConfig(job);
        //job.setMapperClass(phoneHbaseMapper.class);
        //job.setMapOutputKeyClass(Text.class);
       // job.setMapOutputValueClass(IntWritable.class);
        job.setReducerClass(phoneHbaseReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileOutputFormat.setOutputPath(job,new Path("C:\\Users\\hp\\Desktop\\c"));
        boolean result = job.waitForCompletion(true);
        System.out.println(result);

    }
        //设置map端的属性及输入数据来源   
        private static void initHBaseInputConfig(Job job) {
            Connection connection = null;
            Admin admin = null;
            try {
                String tableName = "phone";
                connection = ConnectionFactory.createConnection(job.getConfiguration());
                admin = connection.getAdmin();
                if(!admin.tableExists(TableName.valueOf(tableName))) throw new RuntimeException("无法找到目标表.");
                Scan scan = new Scan();
                //可以优化
                //初始化Mapper
                TableMapReduceUtil.initTableMapperJob(
                        tableName,
                        scan,
                        phoneHbaseMapper.class,
                        Text.class,
                        IntWritable.class,
                        job,
                        true);
            } catch (IOException e) {
                e.printStackTrace();
            }finally {
                try {
                    if(admin != null){
                        admin.close();
                    }
                    if(connection != null && !connection.isClosed()){
                        connection.close();
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
//rowkey=callee+start_time
//13094566759_20180219 18:38:38
//value = callee_phone call_long call_phone start_time
class phoneHbaseMapper extends TableMapper{
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        String rowkey = Bytes.toString(key.get());
        String[] split =rowkey.split("_");
        int callee_long = 0;
        String callee_phone=null;
        String year = split[1].substring(0,4);
        String month = split[1].substring(0,6);
        String day=split[1].substring(0,8);
        Cell[] cells = value.rawCells();
        for( Cell cell:cells ){
             String callee1 =Bytes.toString(CellUtil.cloneQualifier(cell));//列名
             String values =Bytes.toString(CellUtil.cloneValue(cell));//列值
             if(callee1.equals("call_long")){
                 callee_long= Integer.parseInt(values);
             }
             if(callee1.equals("callee_phone")){
                 callee_phone=values;
             }
        }
        context.write(new Text(callee_phone+"-"+year),new IntWritable(callee_long));
        context.write(new Text(callee_phone+"-"+month),new IntWritable(callee_long));
        context.write(new Text(callee_phone+"-"+day),new IntWritable(callee_long));
    }
}
class phoneHbaseReducer extends Reducer{
    @Override
    protected void reduce(Text key, Iterable values, Context context)
            throws IOException, InterruptedException {
        int count = 0;
        int sum =0;
        for (IntWritable value:values) {
               sum += value.get();
               count++;
        }
        context.write(key,new Text(sum+"--->"+count));
    }
}

你可能感兴趣的:(大数据,hbase,Hadoop)