在eclipse中配置好hadoop插件后,编写wordCount程序如下
package hadoop01;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountMapReduce {
// step 1: Mapper
public static class WordCountMapper extends
Mapper
private Text mapOutputKey = new Text();
private IntWritable mapOutputValue = new IntWritable(1);
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// line value
String lineValue = value.toString();
// spilt
// String[] strs = lineValue.split(" ");
StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
while (stringTokenizer.hasMoreTokens()) {
// set map output key
mapOutputKey.set(stringTokenizer.nextToken());
// output
context.write(mapOutputKey, mapOutputValue);
}
/**
* // iterator for (String str : strs) {
*
* mapOutputKey.set(str);
*
* context.write(mapOutputKey, mapOutputValue);
*
* }
*/
}
}
// step 2: Reducer
public static class WordCountReducer extends Reducer
private IntWritable outputValue = new IntWritable();
@Override
protected void reduce(Text key, Iterable
Context context) throws IOException, InterruptedException {
// temp sum
int sum = 0;
// iterator
for (IntWritable value : values) {
sum += value.get();
}
// set output
outputValue.set(sum);
context.write(key, outputValue);
}
}
// step 3: Driver
public int run(String[] args) throws Exception {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration, this.getClass()
.getSimpleName());
job.setJarByClass(WordCountMapReduce.class);
// set job
// input
Path inpath = new Path(args[0]);
FileInputFormat.addInputPath(job, inpath);
// output
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
// Mapper
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// Reducer
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// submit job -> YARN
boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
//System.setProperty("hadoop.home.dir", "F:/BigData/hadoop2.7.3/hadoop-2.7.3");
args = new String[] {
"hdfs://slave01:8020/df/data/wordCount.txt",
"hdfs://slave01:8020/df/data/output06" };
// run job
int status = new WordCountMapReduce().run(args);
System.exit(status);
}
}
运行后报错:
org.apache.hadoop.security.AccessControlException: Permission denied: user=haibozhang, access=WRITE, inode="/df/data/output06/_temporary/0":root:supergroup:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:319)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:292)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:213)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:190)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1728)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1712)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1695)
at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:71)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3896)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:984)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:622)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3002)
at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2970)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1881)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:313)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:511)
17/10/15 14:28:38 INFO mapreduce.Job: Job job_local210975402_0001 running in uber mode : false
17/10/15 14:28:38 INFO mapreduce.Job: map 0% reduce 0%
17/10/15 14:28:38 INFO mapreduce.Job: Job job_local210975402_0001 failed with state FAILED due to: NA
17/10/15 14:28:38 INFO mapreduce.Job: Counters: 0
很明显的一个错误:指定用户(haibozhang)没有hadoop的write权限,为什么会这种情况,详见http://blog.csdn.net/lunhuishizhe/article/details/50489849
解决方案主要有四个:
1.方法一:
在系统环境变量或或java JVM变量里添加: HADOOP_USER_NAME=XXX (hadoop上的linux用户名) ==> 已验证可行
2.方法二:
修改hadoop的配置文件:etc/hadoop/hdfs-site.xml,找到dfs.permissions的配置项,将value改为false,如果没有该配置项,直接添加,如下
修改完成后,重启dfs即可生效; ==>已验证可行
3. 方法三:
将当前windows账号修改为与hadoop的linux用户名一致; ==>已验证可行
4. 方法四:
使用HDFS的命令修改相应目录的权限,bin/hdfs dfs -chmod 777 /-df 或者 hadoop fs -chmod 777 /df, 后面的/df是要写入文件的路径
==> 已验证可行
修改完成后,重新运行程序即可。
开发环境:jdk7、hadoop2.7.3、centOS6.7、eclipse(Mars.1 Release (4.5.1))