使用MapReduce传输HBASE与MySQL数据

使用Mapreduce处理hbase的数据,并将数据存储到MySQL中

1、首先先定义一个类,这里面的字段要和你的mysql数据库中的表的字段相对应

package com.hbase2mysql;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
//如果不把StuHbase当做Map输出的key时,直接使用Writable,不用使用WritableCombarble接口,可以少写一个方法
/*
*这里也就体会出来,在reduce方法内是不进行排序,
*在reduce端进行排序时,是按分区 把不同的map的输出进行合并时,
*会进行排序,此后是不会在进行排序的,这样的结果因为你在reduce方法的输入时进行排序了,所以reduce端是有序的,但是这只是一个reduce中局部有序,如果你有多个reduce task,则结果只会是每个reducetask的输出局部有序,
 要想全局有序,可以在分区时进行改进,把比如你的key是1-100
 你可以把1-10,放到一个分区
 11-20放到一个分区中
 91-100放到一个分区中,这样就会在配个分区中局部有序,也会使全局有序
*/
public class StuHbase  implements WritableComparable<StuHbase>, DBWritable {
	//与mysql中表对应的
    private  String name;
    private  int age;
    private  String sex;
    private  int grade;


    public StuHbase(){}
    public StuHbase(String name,int age,String sex,int grade){
        this.name = name;
        this.age = age;
        this.sex = sex;
        this.grade = grade;
    }
//这里写和读的字段的顺序要一样
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(name);
        dataOutput.writeInt(age);
        dataOutput.writeUTF(sex);
        dataOutput.writeInt(grade);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.name = dataInput.readUTF();
        this.age = dataInput.readInt();
        this.sex = dataInput.readUTF();
        this.grade = dataInput.readInt();
    }

    @Override
    public void write(PreparedStatement preparedStatement) throws SQLException {
    	//类似于jdbc是使用preparedStatement,进行赋值
        int index = 1;
        preparedStatement.setString(index++,name);
        preparedStatement.setInt(index++,age);
        preparedStatement.setString(index++,sex);
        preparedStatement.setInt(index,grade);
    }

    @Override
    public void readFields(ResultSet resultSet) throws SQLException {		//类似于jdbc进行查询
        int index = 1;
        name = resultSet.getString(index++);
        age = resultSet.getInt(index++);
        sex = resultSet.getString(index++);
        grade = resultSet.getInt(index);
    }

    public String getName() {
        return name;
    }

    public int getAge() {
            return age;
    }

    public String getSex() {
        return sex;
    }

    public int getGrade() {
        return grade;
    }

    public void setName(String name) {
        this.name = name;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public void setSex(String sex) {
        this.sex = sex;
    }

    public void setGrade(int grade) {
        this.grade = grade;
    }

    @Override
    public String toString() {
        return name+"\t"+age+"\t"+sex+"\t"+grade;
    }

    @Override
    public int compareTo(StuHbase o) {
        return this.grade-o.grade;
    }
}

2、自定义Map端,从hbase中读取数据

package com.hbase2mysql;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class HBaseMap  extends TableMapper<Text,IntWritable>{
    /**
     *这个MapReduce是简单实现对求 每个人的总分数
     * @param key rowKey, 在hbase中设计的学科号_学号
     * @param value cell的集合
     * @param context 上下文
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        Cell[] cells = value.rawCells();
        String name=null;
        int age = 0;
        String sex = null;
        int grade = 0;
        for(Cell cell : cells){
            //获取列名
            String clomun = new String(CellUtil.cloneQualifier(cell));
            //获取cell的值,就是这一列中的值
            String v = new String(CellUtil.cloneValue(cell));

            switch(clomun){  //根据列名赋予给相应的值
                case "name":
                    name = v;
                    break;
                case "age":
                    age = Integer.parseInt(v);
                    break;
                case "sex":
                    sex = v;
                    break;
                case "grade":
                    grade = Integer.parseInt(v);
                    break;
            }

        }
        // 把值写入
        context.write(new Text(name+"_"+age+"_"+sex),new IntWritable(grade));
    }
}

3、自定义Reduce端,Reduce端的输出的key是自定义的类,value是Text

package com.hbase2mysql;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class HbaseReducer extends Reducer<Text,IntWritable,StuHbase,Text>{
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sumGrade = 0;//记录 总分
            for(IntWritable v : values){
                sumGrade = v.get() +sumGrade;
            }

        final String[] split = key.toString().split("_"); //把传过来的name_age_sex进行切割
        //进行赋值
        final String name = split[0]; 
        int age = Integer.parseInt(split[1]);
        String sex = split[2];
        //把想要存到数据的值赋给自定一个继承DBWritable的类,value置为null
        context.write(new StuHbase(name,age,sex,sumGrade),null);
    }
}

4、Driver端,任务主程序

package com.hbase2mysql;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;

import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class StuHbaseDriver extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        final Configuration conf = HBaseConfiguration.create();
        //想要对hbase进行操作,需要连接zookeeper,不管是读还是写,都是先从zookeeper中获取元数据信息
        conf.set("hbase.zookeeper.quorum","192.168.136.150:2181,192.168.136.151:2181,192.168.136.152:2181");
        
        ToolRunner.run(conf,new StuHbaseDriver(),args);
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = this.getConf();

        //设定要写入的mysql的url和用户名和密码        
        DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://localhost:3306/hbase2db","root","root");
        Job job = Job.getInstance(conf);
        job.setJarByClass(StuHbaseDriver.class);
        
        Scan scan = new Scan();
        scan.setCacheBlocks(false);
        scan.setCaching(500);

        //设置map,表名,scan,Map类.class,输出的key,输出的value,job
        TableMapReduceUtil.initTableMapperJob("stu1",scan,HBaseMap.class,Text.class,IntWritable.class,job);

        //设置reduce的类
        job.setReducerClass(HbaseReducer.class);
        //设置输出格式是DataBase
        job.setOutputFormatClass(DBOutputFormat.class);
        //设置输出时的k,v类型
        job.setOutputKeyClass(StuHbase.class);
        job.setOutputValueClass(Text.class);

        //设置job 输出到mysql时 的 表名,和对应的列
        DBOutputFormat.setOutput(job,"stu","name","age","sex","grade");

        boolean b = =job.waitForCompletion(true);

        return b?0:-1;
    }
}

将mysql中的数据作为MapReduce的输入,并存到hbase中

因为这里用到上面自定义的StuHbase类,且把StuHbase当做了Map端的输出,所以上面就必须继承一个WritableCompable接口,实现对key的排序

//这里把Map端、reduce端、Driver端以静态内部类写到了一块儿
package com.hbase2mysql;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

public class Mysql2Hbase{

    public static class Mysql2HbaseMapper extends Mapper<LongWritable,StuHbase,StuHbase,NullWritable>{
        @Override
        protected void map(LongWritable key, StuHbase value, Context context) throws IOException, InterruptedException {

            System.err.println("******************");
            System.out.println(value);
            context.write(value,NullWritable.get());
        }
    }



    public static class Mysql2HbaseReducer extends TableReducer<StuHbase,NullWritable,ImmutableBytesWritable>{
                    //
        @Override
        protected void reduce(StuHbase key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            final Put put = new Put(Bytes.toBytes("03_001"));

            for(NullWritable v : values){
                put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(key.getName()));
                //此处+ “” 的目的是 为了不再 hbase中 显示乱码, 先把数字转为字符串
                put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(key.getAge()+""));
                put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("sex"),Bytes.toBytes(key.getSex()));
                put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("grade"),Bytes.toBytes(key.getGrade()+""));
            }

                //此处要传入的是(ImmutableBytesWritable, Mutation) ,put是mutation的子类
            context.write(/*new ImmutableBytesWritable(*//*Bytes.toBytes("02_001"))*/new ImmutableBytesWritable(Bytes.toBytes("03_001")),put);
            //context.write();
        }
    }


    public static class Mysql2HbaseDriver extends Configured implements Tool{
        public static void main(String[] args) throws Exception {
            Configuration conf = HBaseConfiguration.create();
            //设置连接的zookeeper的地址,可以对hbase进行操作
            conf.set("hbase.zookeeper.quorum","192.168.136.150:2181,192.168.136.151:2181,192.168.136.152:2181");
            ToolRunner.run(conf,new Mysql2HbaseDriver(),args);
        }

        @Override
        public int run(String[] strings) throws Exception {
            Configuration conf = this.getConf();
            //配置MySQL的的url,用户名和密码
            DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://localhost:3306/hbase2db","root","root");

            final Job job = Job.getInstance(conf);

            job.setJarByClass(Mysql2HbaseDriver.class);

            job.setMapperClass(Mysql2HbaseMapper.class);

            job.setMapOutputKeyClass(StuHbase.class);
            job.setMapOutputValueClass(NullWritable.class);
            //要把数据存储的hbase中的stu1表
            TableMapReduceUtil.initTableReducerJob("stu1",Mysql2HbaseReducer.class,job);

            //设置输入格式是从Database中读取
            job.setInputFormatClass(DBInputFormat.class);
            // job,继承DBWritable的类,表名,查询条件,按那个字段进行排序,要读取的字段
            DBInputFormat.setInput(job,StuHbase.class,"stu",null,"grade","name","age","sex","grade");

            boolean b = job.waitForCompletion(true);

            return b?0:-1;
        }
    }
}

你可能感兴趣的:(Hadoop,MySQL,HBase)