实现思路:
1:构造订单Bean,并已该Bean为key。
2:首先根据订单ID进行Partitioner分区,相同订单在map端被分到同一区
3:其次Partitioner过来在map端进行排序,根据订单大小进行排序
4:经过1,2,3步,到达reducer端的数据已经排好须的OrderBean,但是reduce默认的Comparator是以key的hashcode为依据进行处理的,bean的Hashcode是不同的,所以不好确定,这里重新实现Comparator,让bean的itemid来确定是否相同的itemid进入一个reducer
代码如下:
OrderBean:
package groupingcompare;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class OrderBean implements WritableComparable {
private String itemId;
private Double amount;
public OrderBean() {
}
public void set(String itemId, Double amount) {
this.itemId = itemId;
this.amount = amount;
}
@Override
public int compareTo(OrderBean o) {
int cmp = this.itemId.compareTo(o.getItemId());
if (cmp ==0){
//这里-号确定是升序,如果是+号,你得出是最小金额
cmp = -this.amount.compareTo(o.getAmount());
}
return cmp;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(itemId);
dataOutput.writeDouble(amount);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.itemId = dataInput.readUTF();
this.amount = dataInput.readDouble();
}
public String getItemId() {
return itemId;
}
public void setItemId(String itemId) {
this.itemId = itemId;
}
public Double getAmount() {
return amount;
}
public void setAmount(Double amount) {
this.amount = amount;
}
@Override
public String toString() {
return "OrderBean{" +
"itemId='" + itemId + '\'' +
", amount=" + amount +
'}';
}
}
ItemIdPartitioner:参考HashPartitioner的实现,根据传入的reducerTask进行分区
package groupingcompare;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class ItemIdPartitioner extends Partitioner<OrderBean,NullWritable>{
@Override
public int getPartition(OrderBean orderBean, NullWritable nullWritable, int numReducerTask) {
//和reduce数目一致,这里的numReducerTask是用户job传进来的task数量,参考HashPartitioner
return (orderBean.getItemId().hashCode() & Integer.MAX_VALUE) % numReducerTask;
}
}
ItemidGroupingComparator:
package groupingcompare;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* 利用reduce端的GroupingComparator来实现将一组bean看成是相同的key
*/
public class ItemidGroupingComparator extends WritableComparator {
//传入作为key的bean的class类型,利用反射
public ItemidGroupingComparator() {
super(OrderBean.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
return aBean.getItemId().compareTo(bBean.getItemId());
}
}
运行类:
package groupingcompare;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class Amount {
static class AmountMapper extends Mapper{
OrderBean bean = new OrderBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split(",");
bean.set(fields[0],Double.valueOf(fields[2]));
context.write(bean,NullWritable.get());
}
}
static class AmountReducer extends Reducer {
@Override
protected void reduce(OrderBean key, Iterable values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//本地调试模式
conf.set("mapreduce.framework.name","local");
conf.set("fs.defaultFS","file:///");
// 本地提交模式 hdfs在线
// conf.set("mapreduce.framework.name","local");
// conf.set("fs.defaultFS","hdfs://master:9000");
Job job = Job.getInstance();
//线上
// job.setJarByClass(DataJoin.class);
//指定运行的map程序
job.setMapperClass(AmountMapper.class);
job.setReducerClass(AmountReducer.class);
// //指定map输出数据的kv类型
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
//最终输出数据类型kv
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(ItemIdPartitioner.class);
job.setGroupingComparatorClass(ItemidGroupingComparator.class);
job.setNumReduceTasks(2);
//指定job的输入原始文件所在目录
// FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
FileInputFormat.setInputPaths(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/amount.txt"));
FileOutputFormat.setOutputPath(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/out"));
Boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
}