把MapReduce的结果存到多张MySQL表中(通过自定义OutputFormat)

在使用MapReduce对hbase中的数据进行分析时,通过编写MapReduce程序,对数据进行分析,当我使用MapReduce进行分析后的结果存储到mysql等关系型数据库。

在对hbase中的数据仅分析时,要求算出每人每年的通话时长,每月的通过时常,每天的通话时常,和通话次数,是通过一个MapReduce进行编写的,然后当想要把数据存储到3张表时,遇到了困难,以前存一张表时都是使用的DBInputFormat这个类,但是这个类没有找到存储多张表的方法,所以就使用了自定义类型的outputFormat,把mysql的数据存储到多张表中,然后进行了测试成功了

1、继承RecordWriter

package com.HbaseMR;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.IOException;
import java.sql.*;

//此处的泛型是reduce端的输出的类型的key、 value
public  class MSRecordWrite extends RecordWriter<CallDate, Text> {

    private static Connection conn ;
    static {
        String driver = "com.mysql.jdbc.Driver";
        String url = "jdbc:mysql://localhost:3306/hbase2db";
        String user = "root";
        String password = "root";
        try {
            Class.forName(driver);
            conn = DriverManager.getConnection(url, user, password);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
    @Override
    public void write(CallDate key, Text value) throws IOException, InterruptedException {
        // TODO Auto-generated method stub

        System.out.println("start to write" + value.toString());
        try {
            System.out.println("it already gets into insert function");




            PreparedStatement pstmt = null;

            final String callPhone = key.getCallPhone();
            final String callDate = key.getCallDate();
            final String[] split = value.toString().split("\t");
            long callTime = Integer.parseInt(split[0]);
            int counts = Integer.parseInt(split[1]);

            //年份
            if (callDate.length()==4){
                pstmt = conn.prepareStatement("insert into call_data_y values(?,?,?,?)");
            }else if(callDate.length() ==7){ //按月份
                pstmt = conn.prepareStatement("insert into call_data_m values(?,?,?,?)");

            }else { //按天
                pstmt = conn.prepareStatement("insert into call_data_d values(?,?,?,?)");
            }
            //进行赋值
            pstmt.setString(1,callPhone);
            pstmt.setString(2,callDate);
            pstmt.setLong(3,callTime);
            pstmt.setInt(4,counts);
            pstmt.executeUpdate();
            pstmt.close();

            System.out.println("insert is successfuly " + value.toString());

        } catch (Exception ex) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
            ex.printStackTrace();
        }
    }

    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException {

    }

}

2、继承OutputFormat类

package com.HbaseMR;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;

import java.io.IOException;


public  class MSOutPutFormat extends OutputFormat<CallDate, Text> {

    @Override
    public RecordWriter<CallDate, Text> getRecordWriter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        return new MSRecordWrite();
    }

    @Override
    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {

    }

    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
        return (new org.apache.hadoop.mapreduce.lib.output.NullOutputFormat<NullWritable, IntWritable>())
                .getOutputCommitter(context);

    }

}

3、自定义的类

package com.HbaseMR;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CallDate implements WritableComparable<CallDate>{
//手机号
    private  String callPhone;
    //日期
    private  String callDate;

    public CallDate(){}
    public CallDate(String call_phone, String call_data){
        this.callDate = call_data;
        this.callPhone = call_phone;
    }

    @Override
    public int compareTo(CallDate o) {
        return callPhone.compareTo(o.getCallPhone())==0?  callDate.compareTo(o.getCallDate()): callPhone.compareTo(o.getCallPhone());
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(callPhone);
        out.writeUTF(callDate);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.callPhone= in.readUTF();
        this.callDate = in.readUTF();
    }


    public String getCallDate() {
        return callDate;
    }

    public String getCallPhone() {
        return callPhone;
    }

    public void setCallDate(String callDate) {
        this.callDate = callDate;
    }

    public void setCallPhone(String callName) {
        this.callPhone = callName;
    }

    @Override
    public String toString() {
        return this.callPhone+"\t"+this.callDate;
    }
}


4、Mapper端

package com.HbaseMR;


import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.List;

//此处的泛型是map端的输出
public class HbaseMapper extends TableMapper<CallDate,Text> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
    //每次读取的是一个RowKey的数据,一条数据记录一个人的一次通话
    //主叫号 主叫姓名 被叫号 被叫姓名 通话日期 通过时间 
        String calling_phone = null;
        String call_data = null;
        String call_time = null;
        String call_phone = null;
                //获取所有的cell
        final Cell[] cells = value.rawCells();

      
        for (Cell cell : cells){
        //获取每个cell的列名
            String c = new String(CellUtil.cloneQualifier(cell));
            //获取每个cell的值
            String v = new String(CellUtil.cloneValue(cell));
            //进行匹配,把需要的字段进行取出来
           switch (c){
               case "called_phone" :
                   call_phone = v;
                   break;
               case  "dialing_phone":
                   calling_phone = v;
                   break;
               case "call_date":
                   call_data = v;
                   break;
               case "call_time":
                   call_time = v;
                   break;
           }

        }
        //日期格式是2018-06-26 15:36:03
        //把手机号、日期封装成自定义的bean,bean和通话时间作为reduce端的输入
        //取出年份
        final String sub = call_data.substring(0, 7);
        context.write(new CallDate(calling_phone,sub),new Text(call_time));
    
    //把年份-月份取出来
        context.write(new CallDate(calling_phone,call_data.substring(0,4)),new Text(call_time));
        //年份-月份-日取出来,传到reduce端
        context.write(new CallDate(calling_phone,call_data.substring(0,10)),new Text(call_time));
    }
    
}


5、分区,把年份分到一个分区,以月为单位的分到同一分区中,把以日为单位的分到同一分区中

package com.HbaseMR;


import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class HbasePartitioner extends Partitioner<CallDate,Text>{
    @Override
    public int getPartition(CallDate callDate, Text text, int i) {
        if (callDate.getCallDate().length()==10){
            return  0;
        }else if(callDate.getCallDate().length()==7){
            return 1;
        }else{
            return 2;
        }

    }
}

7、定义GroupingComparator类,把相同手机号和相同日期当做是一个key,进行聚合

package com.HbaseMR;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class HbaseGroupingCompar extends WritableComparator{

    public HbaseGroupingCompar(){
        super(CallDate.class,true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
       CallDate a1 = (CallDate)a;
        CallDate b1 = (CallDate)b;
        if(a1.getCallPhone().equals(b1.getCallPhone())&&a1.getCallDate().equals(b1.getCallDate())){
                return  0;
        }else if(!a1.getCallPhone().equals(b1.getCallPhone())){
             return a1.getCallPhone().compareTo(b1.getCallPhone());
        }else{
           return a1.getCallDate().compareTo(b1.getCallDate());
        }

    }
}

8、reduce端,把手机号和日期相同的进行聚合

package com.HbaseMR;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class HbaseReducer  extends Reducer<CallDate,Text,CallDate,Text>{
    @Override
    protected void reduce(CallDate key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        long callTime= 0;
        int i = 0;
        //i表示通话次数,通话次数就和values的长度相等
        for(Text v : values){
            final long aLong = Long.parseLong(v.toString());
            callTime += aLong;
            i++;
        }

        context.write(key,new Text(callTime+"\t"+i));
    }
}

Driver端

package com.HbaseMR;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;



import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.spatial3d.geom.Tools;

public class HbaseDriver extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        //zk地址,通过获取zk地址,可以对hbase中的数据进行操作
        conf.set("hbase.zookeeper.quorum","192.168.136.150:2181,192.168.136.151:2181,192.168.136.152:2181");
        int status = ToolRunner.run(conf, new HbaseDriver(), args);
        if(status==1){
            System.out.println("成功");
        }else{
            System.out.println("失败");
        }

    }

    @Override
    public int run(String[] args) throws Exception {
        final Configuration conf = this.getConf();
        final Job job = Job.getInstance(conf);
        job.setJarByClass(HbaseDriver.class);
        job.setGroupingComparatorClass(HbaseGroupingCompar.class);
        Scan scan = new Scan();
        scan.setCacheBlocks(false);
        scan.setCaching(500);

        
        TableMapReduceUtil.initTableMapperJob("kafka:call_logs1",scan,
                HbaseMapper.class,CallDate.class,Text.class,job);
        job.setReducerClass(HbaseReducer.class);
        job.setOutputKeyClass(CallDate.class);
        job.setPartitionerClass(HbasePartitioner.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(3);
        //设置reduce的输出为自定义的OutputFormat
        job.setOutputFormatClass(MSOutPutFormat.class);
    
        final boolean status = job.waitForCompletion(true);
        return status==true? 1 : 0 ;
    }
}

这里在补充一个hbase的自定义分区(也就是region的划分)

public static void createTable1(String tableName,String ... familyColumn){
        final HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));
        //这里一共分7个分区,也就是7个人region
        //在ASCII中"|"代表无穷大
        //以00开头的被分为一个region中
        //以01开头的被分到一个region中
        //.....
        //以06开头的被分到一个region中
        byte[][] splitKey = {
                Bytes.toBytes("00|"),
                Bytes.toBytes("01|"),
                Bytes.toBytes("02|"),
                Bytes.toBytes("03|"),
                Bytes.toBytes("04|"),
                Bytes.toBytes("05|"),
                Bytes.toBytes("06|"),

        };
        for(String column:familyColumn){
            final HColumnDescriptor columnDescriptor = new HColumnDescriptor(column);
            if(tableName.equals(Names.TABLENAME_RLS_)){
                columnDescriptor.setMinVersions(100);
                columnDescriptor.setMaxVersions(100);
            }

            tableDescriptor.addFamily(columnDescriptor);
        }

        try {
        	//在创建表时,把分区规则传进去
            admin.createTable(tableDescriptor,splitKey);
            System.out.println("创建成功");
        } catch (IOException e) {
            System.out.println("创建表失败");
            e.printStackTrace();
        }
    }

你可能感兴趣的:(把MapReduce的结果存到多张MySQL表中(通过自定义OutputFormat))