Hadoop——MapReduce——WordCount手写经历

  1. 因为我的是mac而且把shell换成了zsh所以在配置maven环境变量时,并不是在.bash_profile里面配置而是在.zshrc里面配置maven的环境变量
  2. 相关代码

Map阶段

package hadoop.mapReduce.wordCount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Wordcount_Map extends MapperIntWritable>{
  String line = new String();
  String word = new String();
  IntWritable v = new IntWritable(1);
  @Override
  //每行都调用map方法 将每行的信息读入·Text value·
  protected void map(LongWritable key, Text value, Context context) th>rows IOException, InterruptedException {
  line = value.toString();    //将每行的读入数据转化成String
  String[] words = line.split(" "); //对String 进行操作
  for (String word : words) {
      Text k = new Text();
      k.set(word);
      context.write(k, v);    //将结果写入缓存区
           }
  }
}

reduce阶段

package hadoop.mapReduce.wordCount;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Wordcount_Reduce extends Reducer IntWritable>{
  Iterator it_Int = null;
  IntWritable value  = new IntWritable(0);
  @Override
  protected void reduce(Text key, Iterable values,Context > context) throws IOException, InterruptedException {
      int count = 0;
      it_Int = values.iterator();
      while(it_Int.hasNext()) {
          count += it_Int.next().get();
      }
      value.set(count);
      context.write(key, value);
  }
}

Driver阶段

package hadoop.mapReduce.wordCount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountDriver  
  public static void main(String[] args) throws IOException,  ClassNotFoundException, InterruptedException {
      // TODO Auto-generated method stub
      Configuration conf  = new Configuration();
      //1. 获取job对象
      Job job = Job.getInstance(conf);
      //设置jar存储位置
      job.setJarByClass(WordCountDriver.class);
      //3.关联map和reduce
      job.setMapperClass(Wordcount_Map.class);
      job.setReducerClass(Wordcount_Reduce.class);
      //4.设置map阶段输出的key和value
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      //5.设置最终数据的输出形式
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      //6设置输入输出路径
      FileInputFormat.setInputPaths(job, new Path(args[0]));
      FileOutputFormat.setOutputPath(job, new Path(args[1]));
      //7.提交
      boolean result = job.waitForCompletion(true);
      System.exit(result ? 0 : 1);
      }
}

你可能感兴趣的:(Hadoop——MapReduce——WordCount手写经历)