目标:将HBase中student表中的数据,通过MR迁入student_mr表中。
org.apache.hbase
hbase-server
1.3.1
org.apache.hbase
hbase-client
1.3.1
[hadoop@hadoop112 hbase-1.3.1]$ bin/hbase mapredcp
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/module/hbase-1.3.1/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/module/hadoop-2.7.2/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
/opt/module/hbase-1.3.1/lib/zookeeper-3.4.6.jar:/opt/module/hbase-1.3.1/lib/guava-12.0.1.jar:/opt/module/hbase-1.3.1/lib/metrics-core-2.2.0.jar:/opt/module/hbase-1.3.1/lib/protobuf-java-2.5.0.jar:/opt/module/hbase-1.3.1/lib/hbase-common-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar:/opt/module/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar:/opt/module/hbase-1.3.1/lib/hbase-client-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-hadoop-compat-1.3.1.jar:/opt/module/hbase-1.3.1/lib/netty-all-4.0.23.Final.jar:/opt/module/hbase-1.3.1/lib/hbase-server-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-prefix-tree-1.3.1.jar
(1)执行环境变量的导入(临时生效,在命令行执行下述操作)
[hadoop@hadoop112 hbase-1.3.1]$ export HBASE_HOME=/opt/module/hbase-1.3.1
[hadoop@hadoop112 hbase-1.3.1]$ export HADOOP_HOME=/opt/module/hadoop-2.7.2
[hadoop@hadoop112 hbase-1.3.1]$ export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
(2)永久生效:在/etc/profile配置
[hadoop@hadoop112 hbase-1.3.1]$ export HBASE_HOME=/opt/module/hbase-1.3.1
[hadoop@hadoop112 hbase-1.3.1]$ export HADOOP_HOME=/opt/module/hadoop-2.7.2
并在hadoop-env.sh中配置:(注意:在for循环之后配)
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
package com.fczheng.mr1;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* 从HBase中读取数据并包装成我们需要的形式
* TableMapper是Mapper的子类,专门用于读取HBase表中的数据
* key:表中的rowkey
* Result:rowkey对应一行的结果集
* @author fczheng
* @create 2019-08-16 16:27
*/
public class ReadStudentMapper extends TableMapper {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//创建一个Put对象
Put put = new Put(key.get());
Cell[] cells = value.rawCells();
for (Cell cell : cells) {
put.add(cell);
}
context.write(key,put);
}
}
package com.fczheng.mr1;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* 将自定义的数据包装成HBase希望的Mutation型
* TableRducer专门用于输出数据到HBase表中,继承了Reducer
* @author fczheng
* @create 2019-08-16 16:27
*/
public class ReadStudentReducer extends TableReducer {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}
package com.fczheng.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author fczheng
* @create 2019-08-16 16:28
*/
public class ReadStudentDriver implements Tool {
private Configuration conf;
/**
*
* @param args args[0]:输入表,args[1]:输出表,args[2]:startrow,args[3]:stoprow
* @return
* @throws Exception
*/
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(conf, "StudentMR");
job.setJarByClass(ReadStudentDriver.class);
//Scan scan = new Scan(args[2].getBytes(), args[3].getBytes());
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
args[0], //源表名
scan, //扫描范围
ReadStudentMapper.class, //Mapper
ImmutableBytesWritable.class, //KeyOut
Put.class, //ValueOut
job
);
TableMapReduceUtil.initTableReducerJob(
args[1], //目标表名
ReadStudentReducer.class, //Reducer
job
);
boolean res = job.waitForCompletion(true);
return res ? 0 : 1 ;
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
}
@Override
public Configuration getConf() {
return conf;
}
public static void main(String[] args) throws Exception {
/* Tool tool;
if("mr1".equals(args[0])){
tool = new ReadStudentDriver();
}else {
tool = new ReadStudentDriver();
}
ToolRunner.run(HBaseConfiguration.create(),tool,args);*/
Configuration configuration = HBaseConfiguration.create();
int status = ToolRunner.run(configuration, new ReadStudentDriver(), args);
System.exit(status);
}
}
yarn jar hbase-plugin-1.0-SNAPSHOT.jar com.fczheng.mr1.ReadStudentDriver student student_mr
提示:运行任务前,如果待数据导入的表不存在,则需要提前创建。