新建一个hadoop工程,如图
建一个运行wordcount的类,先不管他什么意思,代码如下
/** * Project: hadoop * * File Created at 2012-5-21 * $Id$ */ package seee.you.app; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCount { public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: wordcount "); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
这时候右键runonhadoop
这时候不幸的是,报错了,错误信息如下:
12/05/23 19:38:51 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 12/05/23 19:38:51 ERROR security.UserGroupInformation: PriviledgedActionException as:yongkang.qiyk cause:java.io.IOException: Failed to set permissions of path: \tmp\hadoop-yongkang\mapred\staging\yongkang.qiyk-1840800210\.staging to 0700 Exception in thread "main" java.io.IOException: Failed to set permissions of path: \tmp\hadoop-yongkang\mapred\staging\yongkang.qiyk-1840800210\.staging to 0700 at org.apache.hadoop.fs.FileUtil.checkReturnValue(FileUtil.java:682) at org.apache.hadoop.fs.FileUtil.setPermission(FileUtil.java:655) at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:509) at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:344) at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:189) at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:116) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:856) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093) at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850) at org.apache.hadoop.mapreduce.Job.submit(Job.java:500) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530) at seee.you.app.WordCount.main(WordCount.java:80)
错误信息很明显了,atorg.apache.hadoop.fs.FileUtil.checkReturnValue(FileUtil.java:682)这一行的方法报错了
网上查到这是由于0.20.203.0以后的版本的权限认证引起的,只有去掉才行
修改hadoop源代码,去除权限认证,修改FileUtil.java的checkReturnValue方法,如下:
private static void checkReturnValue(boolean rv, File p, FsPermission permission ) throws IOException { // if (!rv) { // throw new IOException("Failed to set permissions of path: " + p + // " to " + // String.format("%04o", permission.toShort())); // } }
去掉这一行后,需要重新编译打包下,打包成功之后,可以将hadoop-core-1.0.2.jar拷贝到hadoop根目录下,eclipse中重新导入下即可(我用的这个1.0.2是从网上下载的修改好的,比较省事)
这时重新运行下实例,运行实例需要配置下arguments参数,我的配置如下:
run一下,结果如下,说明已经成功了
12/05/28 21:16:29 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 12/05/28 21:16:29 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. ****hdfs://10.16.110.7:9000/user/yongkang/test-in 12/05/28 21:16:29 INFO input.FileInputFormat: Total input paths to process : 0 12/05/28 21:16:30 INFO mapred.JobClient: Running job: job_local_0001 12/05/28 21:16:30 INFO mapred.Task: Using ResourceCalculatorPlugin : null 12/05/28 21:16:30 INFO mapred.LocalJobRunner: 12/05/28 21:16:30 INFO mapred.Merger: Merging 0 sorted segments 12/05/28 21:16:30 INFO mapred.Merger: Down to the last merge-pass, with 0 segments left of total size: 0 bytes 12/05/28 21:16:30 INFO mapred.LocalJobRunner: 12/05/28 21:16:30 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting 12/05/28 21:16:30 INFO mapred.LocalJobRunner: 12/05/28 21:16:30 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now 12/05/28 21:16:30 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to /user/yongkang/test-out6 12/05/28 21:16:31 INFO mapred.JobClient: map 0% reduce 0% 12/05/28 21:16:33 INFO mapred.LocalJobRunner: reduce > reduce 12/05/28 21:16:33 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done. 12/05/28 21:16:34 INFO mapred.JobClient: map 0% reduce 100% 12/05/28 21:16:34 INFO mapred.JobClient: Job complete: job_local_0001 12/05/28 21:16:34 INFO mapred.JobClient: Counters: 10 12/05/28 21:16:34 INFO mapred.JobClient: File Output Format Counters 12/05/28 21:16:34 INFO mapred.JobClient: Bytes Written=0 12/05/28 21:16:34 INFO mapred.JobClient: FileSystemCounters 12/05/28 21:16:34 INFO mapred.JobClient: FILE_BYTES_READ=8604 12/05/28 21:16:34 INFO mapred.JobClient: FILE_BYTES_WRITTEN=51882 12/05/28 21:16:34 INFO mapred.JobClient: Map-Reduce Framework 12/05/28 21:16:34 INFO mapred.JobClient: Reduce input groups=0 12/05/28 21:16:34 INFO mapred.JobClient: Combine output records=0 12/05/28 21:16:34 INFO mapred.JobClient: Reduce shuffle bytes=0 12/05/28 21:16:34 INFO mapred.JobClient: Reduce output records=0 12/05/28 21:16:34 INFO mapred.JobClient: Spilled Records=0 12/05/28 21:16:34 INFO mapred.JobClient: Total committed heap usage (bytes)=5177344 12/05/28 21:16:34 INFO mapred.JobClient: Reduce input records=0