Hadoop经典案例(一)WordCount的简易实现

1:添加依赖包

   
        org.apache.hadoop
        hadoop-common
        2.6.5
    
    
        org.apache.hadoop
        hadoop-hdfs
        2.6.5
    
    
        org.apache.hadoop
        hadoop-client
        2.6.5
    
    
        org.apache.hadoop
        hadoop-mapreduce-client-core
        2.6.5
    

2:添加日志包 log4j.properties

log4j.rootLogger=info,stdout,logFile
#\u63A7\u5236\u53F0\u8F93\u51FA
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%d{ABSOLUTE}] %5p %c{1}:%L - %m%n
#\u7CFB\u7EDF\u65E5\u5FD7\u8F93\u51FA
log4j.appender.logFile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.logFile.File=logs/mad_ccg.log
log4j.appender.logFile.DatePattern='.'yyyy-MM-dd
log4j.appender.logFile.layout=org.apache.log4j.PatternLayout
log4j.appender.logFile.layout.ConversionPattern=[%d{ABSOLUTE}] %5p %c{1}:%L - %m%n
#\u8BBE\u7F6E\u672C\u5DE5\u7A0B\u7C7B\u7EA7\u522B
log4j.logger.com.ctc.email=DEBUG

3: 创建Mapper

public class WordCountMapper extends Mapper {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] letters = value.toString().split("  ");
        for (String letter : letters) {
            context.write(new Text(letter),new IntWritable(1));
        }
    }
}

4:创建Reduce

public class WordCountReduce extends Reducer {
    @Override
    protected void reduce(Text letter, Iterable showCounts, Context context) throws IOException, InterruptedException {
        int totalShowCount = 0;
        Iterator iterator = showCounts.iterator();
        while (iterator.hasNext()) {
            totalShowCount+=iterator.next().get();
        }
        context.write(letter,new IntWritable(totalShowCount));
    }
}

5:创建驱动类

public class WordCountDriver2 {
    public static void main(String[] args) throws Exception {
        //如果没有配置就是使用默认的配置
        Configuration conf = new Configuration();
        Job mrJob = Job.getInstance();
        //让系统知道要使用哪一个驱动类
        mrJob.setJarByClass(WordCountDriver2.class);
        mrJob.setMapperClass(WordCountMapper.class);
        mrJob.setReducerClass(WordCountReduce.class);
        //如果是map输出的键值对和reduce输出的键值对一样,可以忽略map的输出类型
        mrJob.setOutputKeyClass(Text.class);
        mrJob.setOutputValueClass(IntWritable.class);
        //如果是默认的输出输入实现,那么Textin/OutFormat可以忽略不写
        FileInpuFormat.setInputPaths(mrJob, new Path("F:\\wordcount\\input"));
        Path outPath = new Path("F:\\wordcount\\output");
        FileOutputFormat.setOutputPath(mrJob, outPath);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(outPath)) {
            fs.delete(outPath);
        }
        boolean ifsuccess = mrJob.waitForCompletion(true);
        System.out.println(ifsuccess);
    }
}

你可能感兴趣的:(大数据)