Hadoop---MapReduce基础编程

一、实验目的及要求
1、熟悉mapreduce的combiner并行编程设计
2、培养独立设计并行程序的能力
二、实验原理与内容
内容:
假设一个年级有两个班级,数据分别在class1.csv和class2.csv中,求该年级的数学成绩平均值。数据第一列为学号,第二列为数学成绩。 要求,必须使用Combiner类,且最后输出一行数据,该行仅有一个平均值。
在这里插入图片描述
在这里插入图片描述
三、实验软硬件环境
Hadoop集群
四、实验过程(实验步骤、记录、数据、分析)
注:给出所有代码和最终结果截图,排版工整美观
代码如下:
MyDriver.java

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class MyDriver {

   public static void main(String[] args) throws Exception {
      Configuration conf = new Configuration();
      Job job = Job.getInstance(conf, "JobName");
      job.setJarByClass(MyDriver.class);
      // TODO: specify a mapper
      job.setMapperClass(MyMapper.class);
      job.setCombinerClass(MyCombiner.class);
      // TODO: specify a reducer
      job.setReducerClass(MyReducer.class);
       //job.setNumReduceTasks(1);
      // TODO: specify output types
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(NullWritable.class);
      job.setOutputValueClass(IntWritable.class);

      // TODO: specify input and output DIRECTORIES (not files)
      FileInputFormat.setInputPaths(job, new Path("hdfs://0.0.0.0:9000/input"));
       FileOutputFormat.setOutputPath(job, new Path("hdfs://0.0.0.0:9000/output/avg_out"));

      if (!job.waitForCompletion(true))
         return;
   }

}

MyMapper.java

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class MyMapper extends Mapper<LongWritable,Text, IntWritable, MyWritable> {

   private MyWritable my = new MyWritable();
   
   public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
      my.setCount(1);
      my.setAvg(Integer.parseInt(value.toString().split(,)[1]);
      
      context.write(new IntWritable(1), my);
   }
   

}

MyCombiner.java

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class MyCombiner extends Reducer<IntWritable,MyWritable , IntWritable, MyWritable> {
   
   private MyWritable result= new MyWritable();
   
   public void reduce(IntWritable key, Iterable<MyWritable> values, Context context)
         throws IOException, InterruptedException {
      int count=0;
      int sum=0;
      // process values
      for (MyWritable val : values) {
         sum = sum+val.getCount()*val.getAvg();
          count+=val.getCount();
          
      }
      result.setCount(count);
      result.setAvg(sum/count);
      context.write(key, result);
      
   }

}

MyReducer.java

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class MyReducer extends Reducer<IntWritable,MyWritable , NullWritable, IntWritable> {
   
   private MyWritable result= new MyWritable();
   
   public void reduce(IntWritable key, Iterable<MyWritable> values, Context context)
         throws IOException, InterruptedException {
      int count=0;
      int sum=0;
      // process values
      for (MyWritable val : values) {
         sum = sum+val.getCount()*val.getAvg();
          count+=val.getCount();        
      }
      result.setCount(count);
      result.setAvg(sum/count);
      context.write(NullWritable.get(), new IntWritable(sum/count));     
   }
}

MyWritable.java

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class MyWritable implements Writable {
   
   private int count=0;
   private int avg=0;
      @Override
   public String toString() {
      return  count +"," + avg ;
   }


   public int getCount() {
      return count;
   }

   public void setCount(int count) {
      this.count = count;
   }

   public int getAvg() {
      return avg;
   }

   public void setAvg(int avg) {
      this.avg = avg;
   }

   
   @Override
   public void readFields(DataInput in) throws IOException {
      // TODO Auto-generated method stub
      count=in.readInt();
      avg=in.readInt();    
   }
   @Override
   public void write(DataOutput out) throws IOException {
      // TODO Auto-generated method stub
      out.writeInt(count);
      out.writeInt(avg);
   }
}

运行结果
Avg设为int时:
Hadoop---MapReduce基础编程_第1张图片
Hadoop---MapReduce基础编程_第2张图片
Avg设为float时:
Hadoop---MapReduce基础编程_第3张图片

你可能感兴趣的:(Hadoop,hadoop,大数据,mapreduce)