自己撸一个Wordcount

  1. 新建maven工程

  2. 修改pom文件

    
    
        4.0.0
    
        com.chen
        hadoop-hdfs-test
        1.0-SNAPSHOT
    
        
            
                junit
                junit
                RELEASE
            
    
            
                org.apache.logging.log4j
                log4j-core
                2.11.2
            
    
            
                org.apache.hadoop
                hadoop-common
                2.7.2
            
    
            
                org.apache.hadoop
                hadoop-client
                2.7.2
            
    
            
                org.apache.hadoop
                hadoop-hdfs
                2.7.2
            
    
        
    
        
            
                
                    maven-compiler-plugin
                    2.3.2
                    
                        1.8
                        1.8
                    
                
                
                    maven-assembly-plugin 
                    
                        
                            jar-with-dependencies
                        
                        
                            
                                com.chen.mapreduce.WordcountDriver
                            
                        
                    
                    
                        
                            make-assembly
                            package
                            
                                single
                            
                        
                    
                
            
        
    
    
  3. 新建Mapper

    package com.chen.mapreduce;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class WordcountMapper extends Mapper {
    
        Text k = new Text();
        IntWritable v = new IntWritable(1);
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
    
            String[] words = line.split(" ");
    
            for (String word: words){
                k.set(word);
                context.write(k,v);
            }
    
        }
    }
    
    
  4. 新建Reducer

    package com.chen.mapreduce;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class WordcountReducer extends Reducer {
    
        int sum = 0;
        IntWritable v = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
            sum = 0;
            for (IntWritable count: values){
                sum += count.get();
            }
    
            v.set(sum);
            context.write(key,v);
        }
    }
    
    
  5. 新建Driver

    package com.chen.mapreduce;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WordcountDriver {
    
        public static void main(String[] args) throws Exception {
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
    
            job.setJarByClass(WordcountDriver.class);
    
            job.setMapperClass(WordcountMapper.class);
            job.setReducerClass(WordcountReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path(args[0]));
            FileOutputFormat.setOutputPath(job,new Path(args[1]));
    
            boolean result = job.waitForCompletion(true);
    
            System.exit(result ? 0 : 1);
        }
    }
    
    
  6. 打包
    自己撸一个Wordcount_第1张图片

  7. 上传
    自己撸一个Wordcount_第2张图片

  8. 运行

    hadoop jar /data/app/hadoop-2.7.2/my-job/hadoop-hdfs-test-1.0-SNAPSHOT-jar-with-dependencies.jar  /data/hadoop/input /data/hadoop/output
    
    
  9. 查看结果
    自己撸一个Wordcount_第3张图片
    10.查看执行日志
    自己撸一个Wordcount_第4张图片

你可能感兴趣的:(大数据)