hadoop实现同一订单中找出最大金额

实现思路:
1:构造订单Bean,并已该Bean为key。
2:首先根据订单ID进行Partitioner分区,相同订单在map端被分到同一区
3:其次Partitioner过来在map端进行排序,根据订单大小进行排序
4:经过1,2,3步,到达reducer端的数据已经排好须的OrderBean,但是reduce默认的Comparator是以key的hashcode为依据进行处理的,bean的Hashcode是不同的,所以不好确定,这里重新实现Comparator,让bean的itemid来确定是否相同的itemid进入一个reducer
代码如下:
OrderBean:

package groupingcompare;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


public class OrderBean implements WritableComparable {

    private String itemId;
    private Double amount;


    public OrderBean() {
    }

    public void set(String itemId, Double amount) {
        this.itemId = itemId;
        this.amount = amount;
    }

    @Override
    public int compareTo(OrderBean o) {
        int cmp = this.itemId.compareTo(o.getItemId());
        if (cmp ==0){
        //这里-号确定是升序,如果是+号,你得出是最小金额
            cmp = -this.amount.compareTo(o.getAmount());
        }
        return cmp;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(itemId);
        dataOutput.writeDouble(amount);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.itemId = dataInput.readUTF();
        this.amount = dataInput.readDouble();
    }

    public String getItemId() {
        return itemId;
    }

    public void setItemId(String itemId) {
        this.itemId = itemId;
    }

    public Double getAmount() {
        return amount;
    }

    public void setAmount(Double amount) {
        this.amount = amount;
    }

    @Override
    public String toString() {
        return "OrderBean{" +
                "itemId='" + itemId + '\'' +
                ", amount=" + amount +
                '}';
    }
}

ItemIdPartitioner:参考HashPartitioner的实现,根据传入的reducerTask进行分区

package groupingcompare;


import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class ItemIdPartitioner extends Partitioner<OrderBean,NullWritable>{
    @Override
    public int getPartition(OrderBean orderBean, NullWritable nullWritable, int numReducerTask) {

        //和reduce数目一致,这里的numReducerTask是用户job传进来的task数量,参考HashPartitioner
        return (orderBean.getItemId().hashCode() & Integer.MAX_VALUE) % numReducerTask;
    }
}

ItemidGroupingComparator:

package groupingcompare;


import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 利用reduce端的GroupingComparator来实现将一组bean看成是相同的key
 */

public class ItemidGroupingComparator extends WritableComparator {

    //传入作为key的bean的class类型,利用反射
    public ItemidGroupingComparator() {

        super(OrderBean.class,true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        OrderBean aBean = (OrderBean) a;
        OrderBean bBean = (OrderBean) b;
        return aBean.getItemId().compareTo(bBean.getItemId());
    }
}

运行类:

package groupingcompare;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;

public class Amount {

    static class AmountMapper extends Mapper{
        OrderBean bean = new OrderBean();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] fields = line.split(",");
            bean.set(fields[0],Double.valueOf(fields[2]));
            context.write(bean,NullWritable.get());
        }
    }
    static class AmountReducer extends Reducer {

        @Override
        protected void reduce(OrderBean key, Iterable values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();


        //本地调试模式
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","file:///");

//        本地提交模式 hdfs在线
//        conf.set("mapreduce.framework.name","local");
//        conf.set("fs.defaultFS","hdfs://master:9000");



        Job job = Job.getInstance();

        //线上
//        job.setJarByClass(DataJoin.class);


        //指定运行的map程序
        job.setMapperClass(AmountMapper.class);
        job.setReducerClass(AmountReducer.class);

//        //指定map输出数据的kv类型
        job.setMapOutputKeyClass(OrderBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        //最终输出数据类型kv
        job.setOutputKeyClass(OrderBean.class);
        job.setOutputValueClass(NullWritable.class);


        job.setPartitionerClass(ItemIdPartitioner.class);
        job.setGroupingComparatorClass(ItemidGroupingComparator.class);

        job.setNumReduceTasks(2);



        //指定job的输入原始文件所在目录
//        FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
        FileInputFormat.setInputPaths(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/amount.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/out"));

        Boolean res = job.waitForCompletion(true);

        System.exit(res ? 0 : 1);
    }
}

你可能感兴趣的:(hadoop)