1、首先先定义一个类,这里面的字段要和你的mysql数据库中的表的字段相对应
package com.hbase2mysql;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
//如果不把StuHbase当做Map输出的key时,直接使用Writable,不用使用WritableCombarble接口,可以少写一个方法
/*
*这里也就体会出来,在reduce方法内是不进行排序,
*在reduce端进行排序时,是按分区 把不同的map的输出进行合并时,
*会进行排序,此后是不会在进行排序的,这样的结果因为你在reduce方法的输入时进行排序了,所以reduce端是有序的,但是这只是一个reduce中局部有序,如果你有多个reduce task,则结果只会是每个reducetask的输出局部有序,
要想全局有序,可以在分区时进行改进,把比如你的key是1-100
你可以把1-10,放到一个分区
11-20放到一个分区中
91-100放到一个分区中,这样就会在配个分区中局部有序,也会使全局有序
*/
public class StuHbase implements WritableComparable<StuHbase>, DBWritable {
//与mysql中表对应的
private String name;
private int age;
private String sex;
private int grade;
public StuHbase(){}
public StuHbase(String name,int age,String sex,int grade){
this.name = name;
this.age = age;
this.sex = sex;
this.grade = grade;
}
//这里写和读的字段的顺序要一样
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(name);
dataOutput.writeInt(age);
dataOutput.writeUTF(sex);
dataOutput.writeInt(grade);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.name = dataInput.readUTF();
this.age = dataInput.readInt();
this.sex = dataInput.readUTF();
this.grade = dataInput.readInt();
}
@Override
public void write(PreparedStatement preparedStatement) throws SQLException {
//类似于jdbc是使用preparedStatement,进行赋值
int index = 1;
preparedStatement.setString(index++,name);
preparedStatement.setInt(index++,age);
preparedStatement.setString(index++,sex);
preparedStatement.setInt(index,grade);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException { //类似于jdbc进行查询
int index = 1;
name = resultSet.getString(index++);
age = resultSet.getInt(index++);
sex = resultSet.getString(index++);
grade = resultSet.getInt(index);
}
public String getName() {
return name;
}
public int getAge() {
return age;
}
public String getSex() {
return sex;
}
public int getGrade() {
return grade;
}
public void setName(String name) {
this.name = name;
}
public void setAge(int age) {
this.age = age;
}
public void setSex(String sex) {
this.sex = sex;
}
public void setGrade(int grade) {
this.grade = grade;
}
@Override
public String toString() {
return name+"\t"+age+"\t"+sex+"\t"+grade;
}
@Override
public int compareTo(StuHbase o) {
return this.grade-o.grade;
}
}
2、自定义Map端,从hbase中读取数据
package com.hbase2mysql;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class HBaseMap extends TableMapper<Text,IntWritable>{
/**
*这个MapReduce是简单实现对求 每个人的总分数
* @param key rowKey, 在hbase中设计的学科号_学号
* @param value cell的集合
* @param context 上下文
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
Cell[] cells = value.rawCells();
String name=null;
int age = 0;
String sex = null;
int grade = 0;
for(Cell cell : cells){
//获取列名
String clomun = new String(CellUtil.cloneQualifier(cell));
//获取cell的值,就是这一列中的值
String v = new String(CellUtil.cloneValue(cell));
switch(clomun){ //根据列名赋予给相应的值
case "name":
name = v;
break;
case "age":
age = Integer.parseInt(v);
break;
case "sex":
sex = v;
break;
case "grade":
grade = Integer.parseInt(v);
break;
}
}
// 把值写入
context.write(new Text(name+"_"+age+"_"+sex),new IntWritable(grade));
}
}
3、自定义Reduce端,Reduce端的输出的key是自定义的类,value是Text
package com.hbase2mysql;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class HbaseReducer extends Reducer<Text,IntWritable,StuHbase,Text>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sumGrade = 0;//记录 总分
for(IntWritable v : values){
sumGrade = v.get() +sumGrade;
}
final String[] split = key.toString().split("_"); //把传过来的name_age_sex进行切割
//进行赋值
final String name = split[0];
int age = Integer.parseInt(split[1]);
String sex = split[2];
//把想要存到数据的值赋给自定一个继承DBWritable的类,value置为null
context.write(new StuHbase(name,age,sex,sumGrade),null);
}
}
4、Driver端,任务主程序
package com.hbase2mysql;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class StuHbaseDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
final Configuration conf = HBaseConfiguration.create();
//想要对hbase进行操作,需要连接zookeeper,不管是读还是写,都是先从zookeeper中获取元数据信息
conf.set("hbase.zookeeper.quorum","192.168.136.150:2181,192.168.136.151:2181,192.168.136.152:2181");
ToolRunner.run(conf,new StuHbaseDriver(),args);
}
@Override
public int run(String[] strings) throws Exception {
Configuration conf = this.getConf();
//设定要写入的mysql的url和用户名和密码
DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://localhost:3306/hbase2db","root","root");
Job job = Job.getInstance(conf);
job.setJarByClass(StuHbaseDriver.class);
Scan scan = new Scan();
scan.setCacheBlocks(false);
scan.setCaching(500);
//设置map,表名,scan,Map类.class,输出的key,输出的value,job
TableMapReduceUtil.initTableMapperJob("stu1",scan,HBaseMap.class,Text.class,IntWritable.class,job);
//设置reduce的类
job.setReducerClass(HbaseReducer.class);
//设置输出格式是DataBase
job.setOutputFormatClass(DBOutputFormat.class);
//设置输出时的k,v类型
job.setOutputKeyClass(StuHbase.class);
job.setOutputValueClass(Text.class);
//设置job 输出到mysql时 的 表名,和对应的列
DBOutputFormat.setOutput(job,"stu","name","age","sex","grade");
boolean b = =job.waitForCompletion(true);
return b?0:-1;
}
}
因为这里用到上面自定义的StuHbase类,且把StuHbase当做了Map端的输出,所以上面就必须继承一个WritableCompable接口,实现对key的排序
//这里把Map端、reduce端、Driver端以静态内部类写到了一块儿
package com.hbase2mysql;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class Mysql2Hbase{
public static class Mysql2HbaseMapper extends Mapper<LongWritable,StuHbase,StuHbase,NullWritable>{
@Override
protected void map(LongWritable key, StuHbase value, Context context) throws IOException, InterruptedException {
System.err.println("******************");
System.out.println(value);
context.write(value,NullWritable.get());
}
}
public static class Mysql2HbaseReducer extends TableReducer<StuHbase,NullWritable,ImmutableBytesWritable>{
//
@Override
protected void reduce(StuHbase key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
final Put put = new Put(Bytes.toBytes("03_001"));
for(NullWritable v : values){
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(key.getName()));
//此处+ “” 的目的是 为了不再 hbase中 显示乱码, 先把数字转为字符串
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(key.getAge()+""));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("sex"),Bytes.toBytes(key.getSex()));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("grade"),Bytes.toBytes(key.getGrade()+""));
}
//此处要传入的是(ImmutableBytesWritable, Mutation) ,put是mutation的子类
context.write(/*new ImmutableBytesWritable(*//*Bytes.toBytes("02_001"))*/new ImmutableBytesWritable(Bytes.toBytes("03_001")),put);
//context.write();
}
}
public static class Mysql2HbaseDriver extends Configured implements Tool{
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
//设置连接的zookeeper的地址,可以对hbase进行操作
conf.set("hbase.zookeeper.quorum","192.168.136.150:2181,192.168.136.151:2181,192.168.136.152:2181");
ToolRunner.run(conf,new Mysql2HbaseDriver(),args);
}
@Override
public int run(String[] strings) throws Exception {
Configuration conf = this.getConf();
//配置MySQL的的url,用户名和密码
DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://localhost:3306/hbase2db","root","root");
final Job job = Job.getInstance(conf);
job.setJarByClass(Mysql2HbaseDriver.class);
job.setMapperClass(Mysql2HbaseMapper.class);
job.setMapOutputKeyClass(StuHbase.class);
job.setMapOutputValueClass(NullWritable.class);
//要把数据存储的hbase中的stu1表
TableMapReduceUtil.initTableReducerJob("stu1",Mysql2HbaseReducer.class,job);
//设置输入格式是从Database中读取
job.setInputFormatClass(DBInputFormat.class);
// job,继承DBWritable的类,表名,查询条件,按那个字段进行排序,要读取的字段
DBInputFormat.setInput(job,StuHbase.class,"stu",null,"grade","name","age","sex","grade");
boolean b = job.waitForCompletion(true);
return b?0:-1;
}
}
}