简单的 MapReduce 程序的示例代码,用于统计 HBase 中的成绩表中的单科排名和总分排名,并将结果上传到 HDFS 中。...

下面是一个简单的 MapReduce 程序的示例代码,用于统计 HBase 中的成绩表中的单科排名和总分排名,并将结果上传到 HDFS 中:

``` import java.io.IOException;

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Reducer;

public class RankCalculator {

public static class RankMapper extends TableMapper {

private Text subject = new Text();
private final static IntWritable score = new IntWritable(1);

public void map(ImmutableBytesWritable row, Result value, Context context)
    throws IOException, InterruptedException {
  String subjectStr = Bytes.toString(value.getValue(Bytes.toBytes("grades"),
      Bytes.toBytes("subject")));
  subject.set(subjectStr);
  int scoreInt = Bytes.toInt(value.getValue(Bytes.toBytes("grades"),
      Bytes.toBytes("score")));
  score.set(scoreInt);
  context.write(subject, score);
}

}

public static class RankReducer extends Reducer {

private IntWritable result =new IntWritable();

public void reduce(Text key, Iterable values,
    Context context) throws IOException, InterruptedException {
  int sum = 0;
  for (IntWritable val : values) {
    sum += val.get();
  }
  result.set(sum);
  context.write(key, result);
}

}

public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); Job job = Job.getInstance(conf, "rank calculator"); job.setJarByClass(RankCalculator.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob("grades", scan, RankMapper.class, Text.class, IntWritable.

你可能感兴趣的:(hbase,hdfs,mapreduce,hadoop,大数据)