例:Hbase+Map Reduce解决word count

Hbase+Map Reduce解决word count

Mapper类

 package cn.bigdate01.hadoop.mr.wordcount;
    import java.io.IOException;
    
    import org.apache.commons.lang.StringUtils;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WordCountMapper extends Mapper{
    
    	protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{
    		//获取到一行数据的内容
    		String line=value.toString();
    		//切分这一行内容为一个单词数组
    		String[] words=StringUtils.split(line," ");
    		//遍历输出
    		for(String word:words){
    			context.write(new Text(word), new LongWritable(1));
    		}
   		}
    }

Reducer类(与之前没有hbase结合的有所不同,直接将输出文件写入表中)

package cn.bigdate01.hadoop.mr.wordcount;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

public class WCTableReducer extends TableReducer{
		
		public static final byte[] CF="cf".getBytes();  //列族
		public static final byte[] COUNT="count".getBytes();  
		public void reduce(Text key,Iterable values,Context context)throws IOException,InterruptedException{
			
			int i=0;
			
			for(LongWritable val : values){
				i+=val.get();
			}
			
			Put put = new Put(Bytes.toBytes(key.toString()));
			put.addColumn(CF,COUNT,Bytes.toBytes(i));
			
			context.write(null, put);
		}
	
}

Run类(配置环境并执行该MR)

package cn.bigdate01.hadoop.mr.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

/**
 * 用来描述一个作业job(使用哪个mapper类,哪个reducer类,输入文件在哪,输出结果放哪。。。)
 * 然后提交这个job给Hadoop集群
 * @author surface
 *
 */
//cn.bigdate01.hadoop.mr.wordcount.WordCountRunner

public class WordCountRunner {

	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		Configuration conf=new Configuration();
		conf.set("hbase.zookeeper.quorum","192.168.235.132:2181,192.168.235.147:2181,192.168.235.148:2181");
		//配置hadoop集群
		conf.set("fs.defaultFS","hdfs://192.168.235.139:8020");
		conf.set("yarn.resourcemanager.hostname","bigdate06");
		Job wcjob=Job.getInstance(conf);
		
		//设置wcjob中的资源所在的jar包
		wcjob.setJarByClass(WordCountRunner.class);
		
		
		//wcjob要使用哪个mapper类
		wcjob.setMapperClass(WordCountMapper.class);
		
		//wcjob的mapper类输入的kv数据类型
		wcjob.setMapOutputKeyClass(Text.class);
		wcjob.setMapOutputValueClass(LongWritable.class);
		
		//指定要处理的元数据所存放的路径
		FileInputFormat.setInputPaths(wcjob, "hdfs://192.168.235.139:9000/wc/srcdate/wc.txt");
		
		//hbase reducer
		String tergetTable="wc";
		TableMapReduceUtil.initTableReducerJob(
				tergetTable, //reducer输出到哪一张表
				WCTableReducer.class,  //制定reducer
				wcjob
				);
	
		//提交job(true显示在屏幕上)
		boolean flag=wcjob.waitForCompletion(true);
		if(flag){
			System.out.println("success!");
		}
	}

}

你可能感兴趣的:(大数据)