hadoop之hello world

//初学hadoop ,这是第一个例子wordCount

import java.io.IOException ;

import java.util.StringTokenizer;


import org.apach.hadoop.conf.*;

import org.apach.hadoop.fs.*;

import org.apach.hadoop.io.*;

import org.apach.hadoop.mapreduce.*;

import org.apach.hadoop.mapreduce.lib.input.*;

import org.apach.hadoop.mapreduce.lib..output.*;

import org.apach.hadoop.util.Tool;

import org.apach.hadoop.util.ToolRunner;


/*

* author : guigu

* time : 2014-10-17

*/

public clss NewWordCount extends Configured implements Tool{

    

     public static class Map extends Mapper<LongWritable, Text,Text,IntWritable>{

        private final static IntWritable one = new IntWritable(1);

        private Text word = new Text();

       

        public void map (LongWritable key ,Text value, Context context) throws IOException , InterruptException{

         

           String line = value.toString();

           StringTokenizer  tokenizer = new StringTokenizer(line);

            while(tokenizer.hasMoreTokens()){

              word.set(tokenizer.nextToken());

              context.write(word, one);

           

           }

        }

    }

  public static class Reduce extends Reduce (Text , IntWritable,Text, IntWritable>{


       public void reduce (Text key, Interable<IntWritable> values, org.apache.hadoop.mapreduce.Reduce.Context context)

          throws IOException , InterruptedException{


             int sum = 0;

             for (IntWritable val :values) {

              sum += val.get();

             }

            context.write(key, new IntWritable(sum));

          }

  }


  public static void main(String[] args) throw Exception{


       int ret = ToolRunner.run(new New NewWordCount(), args);

        System.exit(ret);

  }


  public int run (String[] args) throws Exception {


      Job  job = new Job(getConf());

      job.setJarByClass(NewWordCount.class);

      job.setJobName("newwordcount");


      job.setOutputKeyClass(Text.class);

      job.setOutputValueClass(IntWritable.class);


    

      job.setMapperClass(Map.class);

      job.setReduceClass(Reduce.class);

     

      job.setInputFormatClass(TextInputFormat.class);

      job.setOutputFormatClass(TextOutputFormat.class);


       FileInputFormat.setInputPaths(job, new Path(args[0]);

       FileOutputFormat.setOutputPath(job, new Path(args[1]);


      boolean success = job.waitForCompletion(true);

      return success ? 0 : 1 ;

  

  }


}


你可能感兴趣的:(hadoop)