1.在HBase上创建table 'wc'
create 'wc','cf1'
2.wordcount的job
package zzw.cn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
public class WordCountJob
{
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
String inputPath="hdfs://master.cn:8020/user/input/";
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master.cn,slave1.cn,slave2.cn");
Job job = Job.getInstance(conf);
job.setJobName("word count");
job.setJarByClass(WordCountJob.class);
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
TableMapReduceUtil.initTableReducerJob(
"wc", // output table
WordCountReduce.class, // reducer class
job);
Boolean flag = job.waitForCompletion(true);
if (flag)
{
System.out.println("job success");
}
}
}
3.Mapper类
package zzw.cn;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
public class WordCountMapper extends Mapper
{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String[] strs = StringUtils.split(value.toString(), ' ');
for (int i = 0; i < strs.length; i++)
{
String s = strs[i];
context.write(new Text(s), new IntWritable(1));
}
}
}
4.Reduce类
package zzw.cn;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class WordCountReduce extends TableReducer
{
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException
{
int i = 0;
for (IntWritable value : values)
{
i += value.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn("cf1".getBytes(), "count".getBytes(), Bytes.toBytes(i + ""));
context.write(null, put);
}
}
5.pom.xml
4.0.0
zzw.cn
hbase.mapreduce
1.0-SNAPSHOT
org.apache.hadoop
hadoop-common
2.6.0-cdh5.15.0
org.apache.hadoop
hadoop-client
2.6.0-cdh5.15.0
org.apache.hadoop
hadoop-hdfs
2.6.0-cdh5.15.0
org.apache.hbase
hbase-common
1.2.0-cdh5.15.0
org.apache.hbase
hbase-client
1.2.0-cdh5.15.0
org.apache.hbase
hbase-server
1.2.0-cdh5.15.0
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos
6.打成jar包在hdfs上运行
hadoop jar /opt/datas/HBaseMrDemo.jar
7.查询hbase中wc的结果:
hbase(main):022:0> scan 'wc'
ROW COLUMN+CELL
hadoop column=cf1:count, timestamp=1543742364591, value=2
hbase column=cf1:count, timestamp=1543742364591, value=1
hello column=cf1:count, timestamp=1543742364591, value=2
name column=cf1:count, timestamp=1543742364591, value=3
world column=cf1:count, timestamp=1543742364591, value=1
zookeeper column=cf1:count, timestamp=1543742364591, value=1
6 row(s) in 0.0250 seconds