1、需求分析
orderid,userid,payment,productid
[root@x00 hd]# cat seventeen_a.txt
1,9819,100,121
2,8918,2000,111
3,2813,1234,22
4,9100,10,1101
5,3210,490,111
6,1298,28,1211
7,1010,281,90
8,1818,9000,20
[root@x00 hd]# cat seventeen_b.txt
100,3333,10,100
101,9321,1000,293
102,3881,701,20
103,6791,910,30
104,8888,11,39
2.Mapper程序:
package cn.edu.bjut.topn;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TopNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
int len;
int[] top;
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
for(int i=1; inew IntWritable(top[i]), new IntWritable(top[i]));
}
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString().trim();
String[] arr = line.split(",");
if(4 == arr.length) {
int payment = Integer.parseInt(arr[2]);
add(payment);
}
}
private void add(int payment) {
top[0] = payment;
Arrays.sort(top);
}
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
len = context.getConfiguration().getInt("N", 10);
top = new int[len+1];
}
}
3.Reducer程序:
package cn.edu.bjut.topn;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class TopNReducer extends Reducer {
int len;
int[] top;
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
for(int i=len; i>0; i--) {
context.write(new Text(String.valueOf(len-i+1)), new Text(String.valueOf(top[i])));
}
}
@Override
protected void reduce(IntWritable key, Iterable values, Context context)
throws IOException, InterruptedException {
for(IntWritable value : values) {
add(value.get());
}
}
private void add(int i) {
top[0] = i;
Arrays.sort(top);
}
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
len = context.getConfiguration().getInt("N", 10);
top = new int[len + 1];
}
}
4.主程序:
package cn.edu.bjut.topn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MainJob {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "topn");
job.setJarByClass(MainJob.class);
job.setMapperClass(TopNMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(TopNReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outPath = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
job.waitForCompletion(true);
}
}