大数据学习-Hadoop04-MapReduce-04-辅助排序&二次排序

实验3:辅助排序二次排序

输入数据

0000001	Pdt_01	222.8
0000002	Pdt_05	722.4
0000001	Pdt_05	25.8
0000003	Pdt_01	222.8
0000003	Pdt_01	33.8
0000002	Pdt_03	522.8
0000002	Pdt_04	122.4

输出数据

E:\output\order2\part-r-00000

3	222.8

E:\output\order2\part-r-00001

1	222.8

E:\output\order2\part-r-00002

2	722.4

分析

(1)利用“订单id和成交金额”作为key,可以将map阶段读取到的所有订单数据按照id分区,按照金额排序,发送到reduce。
(2)在reduce端利用groupingcomparator将订单id相同的kv聚合成组,然后取第一个即是最大值。

Order.class

public class Order implements WritableComparable<Order> {
	/* 订单编号 */
	private int orderId;
	/* 价格 */
	private double price;

	public Order() {
	}

	public Order(int orderId, double price) {
		this.orderId = orderId;
		this.price = price;
	}

	@Override
	public String toString() {
		return orderId + "\t" + price;
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.orderId = in.readInt();
		this.price = in.readDouble();
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(orderId);
		out.writeDouble(price);
	}

	// 二次排序
	@Override
	public int compareTo(Order o) {
		// 比较 id , price
		int result;
		if (orderId > o.getOrderId()) {
			result = 1;
		} else if (orderId > o.getOrderId()) {
			result = -1;
		} else {
			// 价格倒序排序
			result = price > o.getPrice() ? -1 : 1;
		}
		return result;
	}

	// get and set ...
}

OrderSortMapper.class

public class OrderSortMapper extends Mapper<LongWritable, Text, Order, NullWritable> {
	Order k = new Order();

	@Override
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		// 0000001 Pdt_01 222.8
		// 1 获取一行
		String line = value.toString();
		// 2 切割
		String[] fields = line.split("\\t");
		// 3 封装对象
		int orderId = Integer.parseInt(fields[0]);
		double price = Double.parseDouble(fields[2]);
		k.setOrderId(orderId);
		k.setPrice(price);
		// 4 写出
		// 0000001 222.8
		context.write(k, NullWritable.get());

	}
}

OrderSortReducer.class

public class OrderSortReducer extends Reducer<Order, NullWritable, Order, NullWritable> {

	@Override
	protected void reduce(Order key, Iterable<NullWritable> values, Context context)
			throws IOException, InterruptedException {

		context.write(key, NullWritable.get());
	}
}

OrderPartitioner.class

public class OrderPartitioner extends Partitioner<Order, NullWritable>{

    @Override
    public int getPartition(Order key, NullWritable value, int numPartitions) {
        // 根据 order.key 进行分区
        return (key.getOrderId() & Integer.MAX_VALUE) % numPartitions;
    }
}

OrderGroupingComparator.class

public class OrderGroupingComparator extends WritableComparator {
	
    protected OrderGroupingComparator() {
        super(Order.class, true);
    }
	
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		Order aBean = (Order) a;
		Order bBean = (Order) b;

		int result;
		if (aBean.getOrderId() > bBean.getOrderId()) {
			result = 1;
		} else if (aBean.getOrderId() < bBean.getOrderId()) {
			result = -1;
		} else {
			result = 0;
		}

		return result;
	}
}

OrderSortDriver.class

// 设置分组排序类
job.setGroupingComparatorClass(OrderGroupingComparator.class);
// 设置分区类
job.setPartitionerClass(OrderPartitioner.class);
// 设置reduce任务个数
job.setNumReduceTasks(3);

你可能感兴趣的:(#,大数据学习-Hadoop)