编写MapReduce程序算出高峰时间段(如9-10点)哪张表被访问的最频繁
package club.drguo.xx.mapreduce.tablecount; import java.io.IOException; import java.util.TreeMap; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 用Hadoop分析海量日志文件,每行日志记录了如下数据: * TableName(表名),Time(时间),User(用户),TimeSpan(时间开销) * 要求编写MapReduce程序算出高峰时间段(如9-10点)哪张表被访问的最频繁 * 以及这段时间访问这张表最多的用户,以及这个用户访问这张表的总时间开销。 * @author drguo *t003 6:00 u002 180 *t003 7:00 u002 180 *t003 7:08 u002 180 *t003 7:25 u002 180 *t002 8:00 u002 180 *t001 8:00 u001 240 *t001 9:00 u002 300 *t001 9:11 u001 240 *t003 9:26 u001 180 *t001 9:39 u001 300 * * * 先找出9-10点访问量最大的表 * */ //club.drguo.xx.mapreduce.tablecount.TableCount public class TableCount { public static class TableCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ private Text k = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] strings = StringUtils.split(line, " "); String tabName = strings[0]; String time = strings[1]; String[] times = time.split(":"); int hour = Integer.parseInt(times[0]); k.set(tabName); if(hour==9){ context.write(k, new LongWritable(1)); System.out.println("-----------------------------------------------"+k); } } } public static class TableCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{ private TreeMap<Text, Long> map = new TreeMap<Text, Long>(); @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { Text tabName = new Text(key.toString());//不要直接Text tabName = key; long count = 0; for(LongWritable value : values){ count += value.get(); } System.out.println(tabName+"--------------------------"+count); map.put(tabName, count); } @Override protected void cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { Text tableName = null; Long maxCount = 0L; for(Text key : map.keySet()){ System.out.println("key="+key+"-----------------value="+map.get(key)); while(map.get(key)>maxCount){ maxCount = map.get(key); tableName = key; } } context.write(tableName, new LongWritable(maxCount)); } } public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration,"tablejob"); job.setJarByClass(TableCount.class); job.setMapperClass(TableCountMapper.class); job.setReducerClass(TableCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, "hdfs://localhost:9000/log"); FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/tablecount")); System.exit(job.waitForCompletion(true)?0:1); } }