Windows运行Hadoop MapReduce

1 导包


            org.apache.hadoop
            hadoop-client
            2.7.3
        
        
            org.apache.hadoop
            hadoop-yarn-common
            2.7.3
        
        
            org.apache.hadoop
            hadoop-yarn-client
            2.7.3
        
        
            org.apache.hadoop
            hadoop-yarn-server-resourcemanager
            2.7.3
        
        
            org.anarres.lzo
            lzo-hadoop
            1.0.0
            compile
        

2.Mapper,Reducer,App三个文件,如下

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WCReducer extends Reducer{
    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
        int count = 0 ;
        for(IntWritable iw : values){
            count = count + iw.get() ;
        }
        String tno = Thread.currentThread().getName();
        System.out.println(tno + " : WCReducer :" + key.toString() + "=" + count);
        context.write(key,new IntWritable(count));
    }
}
 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class WCMapper extends Mapper{
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        Text keyOut = new Text();
        IntWritable valueOut = new IntWritable();
        String[] arr = value.toString().split(" ");
        for(String s : arr){
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut,valueOut);
        }
    }
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
public class WCApp {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        Job job = Job.getInstance(conf);
        //设置job的各种属性
        job.setJobName("WCApp");                        //作业名称
        job.setJarByClass(WCApp.class);                 //搜索类
        job.setInputFormatClass(TextInputFormat.class); //设置输入格式
        //设置输出格式类
        //job.setOutputFormatClass(SequenceFileOutputFormat.class);
        //添加输入路径
        *FileInputFormat.addInputPath(job,new Path(args[0]));*
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置最大切片数
        //FileInputFormat.setMaxInputSplitSize(job,13);
        //最小切片数
        //FileInputFormat.setMinInputSplitSize(job,1L);
        //设置分区类
        job.setPartitionerClass(MyPartitioner.class);   //设置自定义分区
        //设置合成类
        job.setCombinerClass(WCReducer.class);          //设置combiner类
        job.setMapperClass(WCMapper.class);             //mapper类
        job.setReducerClass(WCReducer.class);           //reducer类
        job.setNumReduceTasks(3);                       //reduce个数
        job.setMapOutputKeyClass(Text.class);           //
        job.setMapOutputValueClass(IntWritable.class);  //
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);     //
        job.waitForCompletion(true);
    }
}

3.安装hadoop,解压hadoop压缩包,并配置环境变量。

4.运行报错
(null) entry in command string: null chmod 0700
根据网络上的两种解决办法。第一种
在程序中指定hadoop.home.dir

System.setProperty("hadoop.home.dir","c:/Users/Administrator/Desktop/hadoop-2.7.7/bin" );

毫无卵用
第二种,参考 : https://ask.hellobi.com/blog/jack/5063
第二种 第一步:
下载winutils.exe,libwinutils.lib 拷贝到%HADOOP_HOME%\bin目录
毫无卵用
第二步:
下载hadoop.dll,并拷贝到c:\windows\system32目录中
报的错终于不一样了,如下:
file permissions : java.io.IOException: (null) entry in command string: null ls -F

再百度,说输入路径是需要写具体路径,我找到RUN->Edit Configuration,如下图
Windows运行Hadoop MapReduce_第1张图片
这里一定注意,第一个是输入参数,要指定具体文件,第二个是输出参数,out文件夹必须没有,有就要删掉。

你可能感兴趣的:(hadoop)