026_默认的MapReduce Driver(最小驱动问题)

1、 最小配置的MapReduce Driver

读取输入文件中的内容,输出到指定目录的输出文件中,此时文件中的内容为:

Key---输入文件每行内容的起始位置。

Value---输入文件每行的原始内容。

输出文件中的内容就是:key+\t+value.

 1 package org.dragon.hadoop.mapreduce.app.minDriver;
 2 
 3 import java.io.IOException;
 4 
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.fs.Path;
 7 import org.apache.hadoop.mapreduce.Job;
 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 
11 /**
12  *
13  * @author ZhuXY  
14  * @time   2016-3-13 下午9:24:49
15  *
16  */
17 
18 /**
19  * function:最小配置的MapReduce Driver
20  * 
21  * 读取输入文件中的内容,输出到指定目录的输出文件中,
22  *     此时文件中的内容为: Key---输入文件每行内容的起始位置。
23  *                 Value---输入文件每行的原始内容。
24  *     输出文件中的内容就是:key+\t+value.
25  * 
26  * @author ZhuXY
27  * 
28  */
29 public class MinimalDriverMapReduce {
30     
31     /*
32      * Mapper Class
33      */
34     
35     /*
36      * Reducer Class
37      */
38     
39     /*
40      * Driver Code
41      */
42     
43     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
44         args=new String[]{
45             "hdfs://hadoop-master.dragon.org:9000/wc/mininput/",
46             "hdfs://hadoop-master.dragon.org:9000/wc/minoutput"
47         };
48         
49         // get conf
50         Configuration conf=new Configuration();
51         
52         // create job
53         Job job=new Job(conf, MinimalDriverMapReduce.class.getSimpleName());
54         
55         // set job
56         job.setJarByClass(MinimalDriverMapReduce.class);
57         //    1) set input
58         FileInputFormat.addInputPath(job, new Path(args[0]));
59         
60         //    2) set map
61     
62         //    3) set reduce
63         
64         //    4) set output
65         FileOutputFormat.setOutputPath(job, new Path(args[1]));
66         
67         // submit job
68         boolean isSuccess=job.waitForCompletion(true);
69         
70         // return status
71         System.exit(isSuccess?0:1);
72     }
73 }

2、查看默认的配置

  主要在这个类中:

3、Map与reduce的默认输入输出类型。

4、写最小配置默认

导包:

 1 package org.dragon.hadoop.mapreduce.app.minDriver;
 2 
 3 import java.io.IOException;
 4 
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.fs.Path;
 7 import org.apache.hadoop.io.LongWritable;
 8 import org.apache.hadoop.io.Text;
 9 import org.apache.hadoop.mapreduce.Job;
10 import org.apache.hadoop.mapreduce.Mapper;
11 import org.apache.hadoop.mapreduce.Reducer;
12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
16 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
View import Code

 

真正的代码:

 1 * @author ZhuXY  
 2  * @time   2016-3-13 下午9:45:02
 3  *
 4  */
 5 
 6 /**
 7  * MapReduce Minimal Driver默认配置
 8  * 
 9  * @author ZhuXY
10  * 
11  */
12 public class TotalDefaultMinimalDriverMP {
13     /*
14      * Mapper Class
15      */
16 
17     /*
18      * Reducer Class
19      */
20 
21     /*
22      * Driver Code
23      */
24 
25     public static void main(String[] args) throws IOException,
26             ClassNotFoundException, InterruptedException {
27         args = new String[] {
28                 "hdfs://hadoop-master.dragon.org:9000/wc/mininput/",
29                 "hdfs://hadoop-master.dragon.org:9000/wc/minoutput" };
30 
31         // step 1:get conf
32         Configuration conf = new Configuration();
33 
34         // step 2:create job
35         Job job = new Job(conf, MinimalDriverMapReduce.class.getSimpleName());
36 
37         // step 3:set job
38         // 1) set run jar class
39         job.setJarByClass(MinimalDriverMapReduce.class);
40 
41         // 2) set input format
42         job.setInputFormatClass(TextInputFormat.class);                //可省
43 
44         // 3) set input path
45         FileInputFormat.addInputPath(job, new Path(args[0]));
46 
47         // 4) set mapper class
48         job.setMapperClass(Mapper.class);                //可省
49 
50         // 5)set map input key/value class
51         job.setMapOutputKeyClass(LongWritable.class);                //可省
52         job.setMapOutputValueClass(Text.class);                //可省
53 
54         // 6) set partitioner class
55         job.setPartitionerClass(HashPartitioner.class);                //可省
56 
57         // 7) set reducer number
58         job.setNumReduceTasks(1);//default 1                //可省
59         // 8)set sort comparator class
60         job.setSortComparatorClass(LongWritable.Comparator.class);                //可省
61 
62         // 9) set group comparator class
63         job.setGroupingComparatorClass(LongWritable.Comparator.class);                //可省
64 
65         // 10) set combiner class
66         //job.setCombinerClass(null);默认是null,但是此处不能写                //可省
67 
68         // 11) set reducer class
69         job.setReducerClass(Reducer.class);                //可省
70 
71         // 12) set output format
72         job.setOutputFormatClass(TextOutputFormat.class);                //可省
73 
74         // 13) job output key/value class
75         job.setOutputKeyClass(LongWritable.class);                //可省
76         job.setOutputValueClass(Text.class);                //可省
77 
78         // 14) job output path
79         FileOutputFormat.setOutputPath(job, new Path(args[1]));
80 
81         // step 4: submit job
82         boolean isSuccess = job.waitForCompletion(true);
83 
84         // step 5: return status
85         System.exit(isSuccess ? 0 : 1);
86     }
87 }

你可能感兴趣的:(026_默认的MapReduce Driver(最小驱动问题))