分组TopN

package com.zhiyou.bd23.topn;

import java.io.File;
import java.io.IOException;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import sun.launcher.resources.launcher;

//分组topN,求出每一种类型的音乐的播放量top3的音乐名称和播放次数
public class GroupTopN {
	//map的输出:key(音乐类型),value(音乐名称+播放次数)
	public static class GroupTopNMap extends Mapper{
		private Text outputKey = new Text();
		private Text outputValue = new Text();
		private String[] infos;
		@Override
		protected void map(LongWritable key, Text value, Mapper.Context context)
				throws IOException, InterruptedException {
			if (key.get()>0) {
				infos = value.toString().trim().split(";");
				if(infos!=null && infos.length==3){
					outputKey.set(infos[2]);
					outputValue.set(infos[0]+";"+infos[1]);
					context.write(outputKey, outputValue);
				}
			}
		}
	}
	//reduce上对每一组key求一个topN
	public static class GroupTopNReduce extends Reducer{
		private Text outputKey = new Text();
		private Text outputValue = new Text(); 
		//定义treemap用来求每一组类型的音乐的top3
		private TreeMap top3 = new TreeMap();
		private String[] infos;
		@Override
		protected void reduce(Text key, Iterable values, Reducer.Context context)
				throws IOException, InterruptedException {
			//清空top3
			top3.clear();
			for(Text value:values){
				infos = value.toString().split(";");
				//top3中有播放次数相同的
				if(top3.containsKey(Integer.valueOf(infos[1]))){
					//把歌曲的名称添加到播放次数相同的歌曲名称中去
					top3.put(Integer.valueOf(infos[1]), top3.get(Integer.valueOf(infos[1]))+","+infos[0]);
				}else{
					if(top3.size()==3){
						//加进来 再删一条
						top3.put(Integer.valueOf(infos[1]), infos[0]);
						top3.remove(top3.firstKey());
					}else{
						//直接加进来
						top3.put(Integer.valueOf(infos[1]), infos[0]);
					}
				}
			}
			//把top3中的数据输出
			for(int i:top3.descendingKeySet()){
				outputKey.set(key);
				outputValue.set(top3.get(i)+";"+i);
				context.write(outputKey, outputValue);
			}
		}
	}
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(GroupTopN.class);
		job.setJobName("分组求topn");
		job.setMapperClass(GroupTopNMap.class);
		job.setReducerClass(GroupTopNReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		Path input = new Path("/musictype.txt");
		Path outputDir = new Path("/grouptopnoutput");
		outputDir.getFileSystem(conf).delete(outputDir, true);
		FileInputFormat.addInputPath(job, input);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setNumReduceTasks(2);
		System.exit(job.waitForCompletion(true)?0:1);
	}
}

你可能感兴趣的:(分组TopN)