编写mapreduce程序从HBase的一张表中求某一列的平均数

 表中的数据

编写mapreduce程序从HBase的一张表中求某一列的平均数_第1张图片

求HBase数据库中data_t表中的attention列的均值

package com.hbase.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.List;

public class GetAttentionMean {

	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();

		FileSystem fs = FileSystem.get(conf);

		Job job = Job.getInstance(conf);

		job.setJarByClass(GetAttentionMean.class);

		Scan scan = new Scan();
		scan.addColumn("Info".getBytes(), "attention".getBytes());

		TableMapReduceUtil.initTableMapperJob(
				"data_t".getBytes(), // 指定表名
				scan, // 指定扫描数据的条件
				MyMapper.class, // 指定mapper class
				Text.class, // mapper阶段的输出的key的类型
				DoubleWritable.class, // mapper阶段的输出的value的类型
				job // job对象
		);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);

		Path outputPath = new Path("/attention/mean");

		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}

		FileOutputFormat.setOutputPath(job, outputPath);

		boolean isSuccess = job.waitForCompletion(true);
		
		if (!isSuccess) {
			throw new IOException("任务运行错误!");
		}

		System.exit(isSuccess ? 0 : 1);
	}

	public static class MyMapper extends TableMapper {

		Text outKey = new Text("attention_mean");
		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void map(ImmutableBytesWritable key, Result value, Context context)
				throws IOException, InterruptedException {

			boolean isContainsColumn = value.containsColumn("Info".getBytes(), "attention".getBytes());

			if (isContainsColumn) {
				List listCells = value.getColumnCells("Info".getBytes(), "attention".getBytes());
				Cell cell = listCells.get(0);
				byte[] cloneValue = CellUtil.cloneValue(cell);
				double attention = Double.valueOf(Bytes.toString(cloneValue));
				outValue.set(attention);
				context.write(outKey, outValue);
			}

		}

	}

	public static class MyReducer extends Reducer {

		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void reduce(Text key, Iterable values, Context context)
				throws IOException, InterruptedException {

			int count = 0;
			double sum = 0;
			for (DoubleWritable value : values) {
				count++;
				sum += value.get();
			}

			double attention_mean = sum / count;
			outValue.set(attention_mean);
			context.write(key, outValue);
		}
	}
}

结果:

 

pom.xml


  4.0.0

  com.hbase
  demo
  0.0.1-SNAPSHOT
  jar

  demo
  http://maven.apache.org

  
    UTF-8
  

 
        
            
                org.apache.maven.plugins
                maven-shade-plugin
                1.4
                
                    
                        package
                        
                            shade
                        
                    
                
            
        
    
    
  
    
            org.apache.hbase
            hbase-client
            1.2.6.1
        
        
            jdk.tools
            jdk.tools
            1.8
            system
            ${JAVA_HOME}/lib/tools.jar
        
        
        
            org.apache.hbase
            hbase-server
            1.2.6.1
        
  

 

你可能感兴趣的:(大数据)