MapReduce面试题汇总

编写MapReduce程序算出高峰时间段(如9-10点)哪张表被访问的最频繁

package club.drguo.xx.mapreduce.tablecount;

import java.io.IOException;
import java.util.TreeMap;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



/**

 * 用Hadoop分析海量日志文件,每行日志记录了如下数据:

 * TableName(表名),Time(时间),User(用户),TimeSpan(时间开销)

 * 要求编写MapReduce程序算出高峰时间段(如9-10点)哪张表被访问的最频繁

 * 以及这段时间访问这张表最多的用户,以及这个用户访问这张表的总时间开销。

 * @author drguo

 *t003 6:00 u002 180

 *t003 7:00 u002 180

 *t003 7:08 u002 180

 *t003 7:25 u002 180

 *t002 8:00 u002 180

 *t001 8:00 u001 240

 *t001 9:00 u002 300

 *t001 9:11 u001 240

 *t003 9:26 u001 180

 *t001 9:39 u001 300

 *

 *

 * 先找出9-10点访问量最大的表

 *

 */
//club.drguo.xx.mapreduce.tablecount.TableCount
public class TableCount {
    public static class TableCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
        private Text k = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            String[] strings = StringUtils.split(line, " ");
            String tabName = strings[0];
            String time = strings[1];
            String[] times = time.split(":");
            int hour = Integer.parseInt(times[0]);
            k.set(tabName);
            if(hour==9){
                context.write(k, new LongWritable(1));
                System.out.println("-----------------------------------------------"+k);
            }
        }
    }
    public static class TableCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
        private TreeMap<Text, Long> map = new TreeMap<Text, Long>();
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            Text tabName = new Text(key.toString());//不要直接Text tabName = key;
            long count = 0;
            for(LongWritable value : values){
                count += value.get();
            }
            System.out.println(tabName+"--------------------------"+count);
            map.put(tabName, count);
        }
        @Override
        protected void cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context context)
                throws IOException, InterruptedException {
            Text tableName = null;
            Long maxCount = 0L;
            for(Text key : map.keySet()){
                System.out.println("key="+key+"-----------------value="+map.get(key));
                while(map.get(key)>maxCount){
                    maxCount = map.get(key);
                    tableName = key;
                }
            }
            context.write(tableName, new LongWritable(maxCount));
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration,"tablejob");
        job.setJarByClass(TableCount.class);
        
        job.setMapperClass(TableCountMapper.class);
        job.setReducerClass(TableCountReducer.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        
        FileInputFormat.setInputPaths(job, "hdfs://localhost:9000/log");
        FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/tablecount"));
        
        System.exit(job.waitForCompletion(true)?0:1);
    }
}




你可能感兴趣的:(MapReduce面试题汇总)