Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException

简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录。

环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Hadoop 1.2.1

改好的代码如下,在reduce阶段遇到了NullPointerException。

 1 public class STjoinEx {

 2     private static final String TIMES = "TIMES";

 3 

 4     public static void main(String[] args) throws Exception {

 5         Configuration configuration = new Configuration();

 6         configuration.setInt(TIMES, 1);

 7         String[] remainingArgs = new GenericOptionsParser(configuration, args).getRemainingArgs();

 8         if (remainingArgs.length != 2) {

 9             System.err.println("STjoinEx <input> <output>");

10             System.exit(2);

11         }

12 

13         Job job = new Job(configuration, STjoinEx.class.getSimpleName());

14         job.setJarByClass(STjoinEx.class);

15         job.setMapperClass(Map.class);

16         job.setReducerClass(Reduce.class);

17         job.setInputFormatClass(KeyValueTextInputFormat.class);

18         job.setOutputFormatClass(TextOutputFormat.class);

19         job.setOutputKeyClass(Text.class);

20         job.setOutputValueClass(Text.class);

21 

22         FileInputFormat.setInputPaths(job, new Path(remainingArgs[0]));

23         FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));

24 

25         System.exit(job.waitForCompletion(true) ? 0 : 1);

26 

27     }

28 

29     public static class Map extends Mapper<Text, Text, Text, Text> {

30         final static Text LEFT_TABLE = new Text();

31         final static Text RIGHT_TABLE = new Text();

32 

33         @Override

34         protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {

35             // left table

36             LEFT_TABLE.set("1 " + value);

37             context.write(key, LEFT_TABLE);

38             // right table

39             RIGHT_TABLE.set("2 " + key);

40             context.write(value, RIGHT_TABLE);

41         }

42     }

43 

44     public static class Reduce extends Reducer<Text, Text, Text, Text> {

45         private static final int INDENT = 2;

46         private static final Text GRAND_PARENT = new Text();

47         private static final Text GRAND_CHILD = new Text();

48 

49         @Override

50         protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

51             // output header

52             int times = context.getConfiguration().getInt(TIMES, 1);

53             if (times == 1) {

54                 context.write(new Text("grandChild"), new Text("grandParent"));

55                 context.getConfiguration().setInt(TIMES, ++times);

56             }

57 

58             // prepare matrix

59             int headChar = 0;

60             String[] grandChild = new String[10];

61             String[] grandParent = new String[10];

62             int grandChildNum = 0;

63             int grandParentNum = 0;

64 

65             for (Text value : values) {

66                 headChar = value.charAt(0);

67                 if (headChar == '1') {

68                     grandParent[grandParentNum] = value.toString().substring(2);

69                     grandParentNum++;

70                 } else {

71                     grandChild[grandChildNum] = value.toString().substring(2);

72                     grandChildNum++;

73                 }

74             }

75 

76             // multiply

77             if (grandChildNum != 0 && grandChildNum != 0) {

78                 for (int i = 0; i < grandChildNum; i++) {

79                     GRAND_CHILD.set(grandChild[i]);

80                     for (int j = 0; j < grandParentNum; j++) {

81                         GRAND_PARENT.set(grandParent[j]);

82                         context.write(GRAND_CHILD, GRAND_PARENT);

83                     }

84                 }

85             }

86         }

87     }

88 }

执行输出为

 1 14/10/07 11:12:51 INFO mapred.JobClient:  map 0% reduce 0%

 2 14/10/07 11:12:54 INFO mapred.JobClient:  map 100% reduce 0%

 3 14/10/07 11:13:01 INFO mapred.JobClient:  map 100% reduce 33%

 4 14/10/07 11:13:04 INFO mapred.JobClient: Task Id : attempt_201410021756_0048_r_000000_0, Status : FAILED

 5 java.lang.NullPointerException

 6     at org.apache.hadoop.io.Text.encode(Text.java:388)

 7     at org.apache.hadoop.io.Text.set(Text.java:178)

 8     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:96)

 9     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:61)

10     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177)

11     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649)

12     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418)

13     at org.apache.hadoop.mapred.Child$4.run(Child.java:255)

14     at java.security.AccessController.doPrivileged(Native Method)

15     at javax.security.auth.Subject.doAs(Subject.java:396)

16     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)

17     at org.apache.hadoop.mapred.Child.main(Child.java:249)

从输出信息可发现,源码96行if (grandChildNum != 0 && grandChildNum != 0)为出错行。两个判断条件重复了,将其中一个改成grandParentNum即可。

执行结果

 1 grandChild    grandParent

 2 Jone    Alice

 3 Jone    Jesse

 4 Tom    Alice

 5 Tom    Jesse

 6 Tom    Mary

 7 Tom    Ben

 8 Jone    Mary

 9 Jone    Ben

10 Philip    Alice

11 Philip    Jesse

12 Mark    Alice

13 Mark    Jesse

你可能感兴趣的:(Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException)