MapReduce实例

数据去重:

 1 import java.io.IOException;

 2 

 3 import org.apache.hadoop.fs.Path;

 4 import org.apache.hadoop.io.Text;

 5 import org.apache.hadoop.mapreduce.Job;

 6 import org.apache.hadoop.mapreduce.Mapper;

 7 import org.apache.hadoop.mapreduce.Reducer;

 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

10 

11 public class Dedup {

12     public static class Map extends Mapper<Object, Text, Text, Text>{

13         private static Text line=new Text();

14         

15         public void  map(Object key,Text value,Context context) throws IOException, InterruptedException {

16             line=value;

17             context.write(line, new Text(""));

18         }

19     }

20     

21     public static class Reduce extends Reducer<Text, Text, Text, Text>{

22         public void  ruduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException {

23             context.write(key, new Text(""));

24         }

25     }

26     

27     public static void main(String[] args) throws Exception {

28         if (args.length != 2) {

29             System.err.println("Usage: DataAverage <input path> <output path>");

30             System.exit(-1);

31         }

32         Job job = new Job();

33         job.setJobName("Dedup");

34         job.setJarByClass(Dedup.class);

35         job.setMapperClass(Map.class);

36         job.setCombinerClass(Reducer.class);

37         job.setReducerClass(Reducer.class);

38         job.setOutputKeyClass(Text.class);

39         job.setOutputValueClass(Text.class);

40         FileInputFormat.addInputPath(job, new Path(args[0]));

41         FileOutputFormat.setOutputPath(job, new Path(args[1]));

42         System.exit(job.waitForCompletion(true) ? 0 : 1);

43     }

44 }
View Code

排序:

 1 import java.io.IOException;

 2 

 3 import org.apache.hadoop.fs.Path;

 4 import org.apache.hadoop.io.IntWritable;

 5 import org.apache.hadoop.io.Text;

 6 import org.apache.hadoop.mapreduce.Job;

 7 import org.apache.hadoop.mapreduce.Mapper;

 8 import org.apache.hadoop.mapreduce.Reducer;

 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

11 

12 

13 public class Sort {

14     public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{

15         private static IntWritable data= new IntWritable();

16         public void map(Object key,Text value,Context context) throws IOException, InterruptedException {

17             String lineString=value.toString();

18             data.set(Integer.parseInt(lineString));

19             context.write(data, new IntWritable(1));

20         }

21     }

22     

23     public static class Reduce extends Reducer<IntWritable ,IntWritable,IntWritable,IntWritable>{

24         private static IntWritable linenum=new IntWritable(1);

25         public void reduce(IntWritable key,Iterable< IntWritable> values,Context context) throws IOException, InterruptedException {

26             for(IntWritable value:values){

27                 context.write(linenum, key);

28                 linenum=new IntWritable(linenum.get()+1);

29             }

30         }

31     }

32     

33     public static void main(String[] args) throws Exception {

34         if (args.length != 2) {

35             System.err.println("Usage: DataAverage <input path> <output path>");

36             System.exit(-1);

37         }

38         Job job = new Job();

39         job.setJobName("Sort");

40         job.setJarByClass(Sort.class);

41         

42         job.setMapperClass(Map.class);

43         job.setReducerClass(Reducer.class);

44         

45         job.setOutputKeyClass(IntWritable.class);

46         job.setOutputValueClass(IntWritable.class);

47         

48         FileInputFormat.addInputPath(job, new Path(args[0]));

49         FileOutputFormat.setOutputPath(job, new Path(args[1]));

50         System.exit(job.waitForCompletion(true) ? 0 : 1);

51     }

52 }
View Code

求平均值:

 1 import java.io.IOException;

 2 import java.util.StringTokenizer;

 3 

 4 import org.apache.hadoop.fs.Path;

 5 import org.apache.hadoop.io.IntWritable;

 6 import org.apache.hadoop.io.Text;

 7 import org.apache.hadoop.mapreduce.Job;

 8 import org.apache.hadoop.mapreduce.Mapper;

 9 import org.apache.hadoop.mapreduce.Reducer;

10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

11 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

12 

13 public class Score {

14     public static class Map extends Mapper<Object, Text, Text, IntWritable>{

15         public void map(Object key,Text value,Context context) throws IOException, InterruptedException{

16             String lineString =value.toString();

17             StringTokenizer stringTokenizer=new StringTokenizer(lineString,"\n");

18             while (stringTokenizer.hasMoreElements()) {

19                 StringTokenizer tokenizerLine = new StringTokenizer(stringTokenizer.nextToken());

20                 String strName=tokenizerLine.nextToken();

21                 String strScore=tokenizerLine.nextToken();

22                 Text name=new Text(strName);

23                 int score=Integer.parseInt(strScore);

24                 context.write(name, new IntWritable(score));

25             }

26         }

27     }

28     

29     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{

30         public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{

31             int sum=0;

32             int count=0;

33             while (values.iterator().hasNext()) {

34                 sum+=values.iterator().next().get();

35                 count++;

36             }

37             int averate=(int)sum/count;

38             context.write(key, new IntWritable(averate));

39         }

40     }

41     

42     public static void main(String[] args) throws Exception {

43         if (args.length != 2) {

44             System.err.println("Usage: DataAverage <input path> <output path>");

45             System.exit(-1);

46         }

47         Job job = new Job();

48         job.setJobName("Score");

49         job.setJarByClass(Score.class);

50         

51         job.setMapperClass(Map.class);

52         job.setReducerClass(Reducer.class);

53         

54         job.setOutputKeyClass(Text.class);

55         job.setOutputValueClass(IntWritable.class);

56         

57         FileInputFormat.addInputPath(job, new Path(args[0]));

58         FileOutputFormat.setOutputPath(job, new Path(args[1]));

59         System.exit(job.waitForCompletion(true) ? 0 : 1);

60     }

61 }
View Code

 

你可能感兴趣的:(mapreduce)