MapReduce 二次排序

自定义key

package test;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class ConsumeWritable implements WritableComparable{
    private  String name;
    private float money;

    public ConsumeWritable() {}


    public ConsumeWritable(String name, float money) {
        super();
        this.name = name;
        this.money = money;
    }
    //从源码中的获得
    public void  set(String  name,float money){
        this.name=name;
        this.money=money;
    }

    public String getName() {
        return name;
    }


    public void setName(String name) {
        this.name = name;
    }


    public float getMoney() {
        return money;
    }


    public void setMoney(float money) {
        this.money = money;
    }

    //序列化
    public void write(DataOutput out) throws IOException {
        out.writeUTF(name);
        out.writeFloat(money);

    }
    //反序列化
    public void readFields(DataInput in) throws IOException {
        name=in.readUTF();
        money=in.readFloat();
    }

    public int compareTo(ConsumeWritable o) {
        //第一次比较
        int compareTo = this.getName().compareTo(o.getName());
        if (compareTo !=0) {
            return compareTo;
        }
        //第二次比较  注意:普通的数据类型是没有compaerTo方法 所以要转换为他的包装类
        return Float.valueOf(this.getMoney()).compareTo(Float.valueOf(o.getMoney()));
    }

    //比较对象两个对象,需要重写equals和hashcode()方法
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + Float.floatToIntBits(money);
        result = prime * result + ((name == null) ? 0 : name.hashCode());
        return result;
    }


    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        ConsumeWritable other = (ConsumeWritable) obj;
        if (Float.floatToIntBits(money) != Float.floatToIntBits(other.money))
            return false;
        if (name == null) {
            if (other.name != null)
                return false;
        } else if (!name.equals(other.name))
            return false;
        return true;
    }



    @Override
    public String toString() {
        return name + "," + money;
    }



}

mapreduce程序

package test;


import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
 * 主要思想:根据shuffle阶段排序是根据key来排序的
 * @author Administrator
 *
 */
public class SecondSortMapReduce extends Configured implements Tool{

    //map映射
    public static class  SecondSortMapper extends Mapper<LongWritable, Text, ConsumeWritable, FloatWritable>{
        private ConsumeWritable mapOutPutKey = new ConsumeWritable();
        private FloatWritable mapOutPutValue= new FloatWritable();
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            //把读取出来的内容 装换为String  类型
            String  line = value.toString();
            //通过制表符分割
            String[] split = line.split("\t");

            mapOutPutKey.set(split[0], Float.valueOf(split[1]));
            mapOutPutValue.set(Float.parseFloat(split[1]));

            context.write(mapOutPutKey, mapOutPutValue);

        }


    }


    public static class SecondSortReducer extends Reducer<ConsumeWritable, FloatWritable, Text, FloatWritable>{
        private  Text  OutPutKey =new Text();
        private  FloatWritable OutPutValue = new FloatWritable();
        @Override
        protected void reduce(ConsumeWritable key,
                Iterable values,Context context)
                throws IOException, InterruptedException {
            OutPutKey.set(key.getName());
            for (FloatWritable floatWritable : values) {
                OutPutValue.set(floatWritable.get());
                context.write(OutPutKey, OutPutValue);
            }
        }

    }




    public int run(String[] args) throws Exception {
        // 1.创建Configuration对象,获取配置文件
                Configuration conf = new Configuration();
                // 2.构建MapReduce Job对象
                Job job = Job.getInstance(conf, this.getClass().getSimpleName());

                job.setJarByClass(getClass());

                // 3.输入目录/文件(input) -》 map -》 reduce -》输出路径 (output)
                // 3.1 设置输入文件所在目录
                Path inPath = new Path(args[0]);
                FileInputFormat.setInputPaths(job, inPath);

                // 3.2 设置Map输出信息
                job.setMapperClass(SecondSortMapper.class);
                job.setMapOutputKeyClass(ConsumeWritable.class);
                job.setMapOutputValueClass(FloatWritable.class);
                //自定义分区
                //job.setPartitionerClass(NamePartitioner.class);
                //自定义分组
                //job.setGroupingComparatorClass(NameGroup.class);
                // 3.3设置reduce的输出信息
                job.setReducerClass(SecondSortReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(FloatWritable.class);

                // 3.4 设置输出路径
                Path outPath = new Path(args[1]);
                FileSystem fs = outPath.getFileSystem(conf);

                if (fs.exists(outPath)) {
                    fs.delete(outPath, true);
                }
                FileOutputFormat.setOutputPath(job, outPath);

                // 提交job
                /**
                 * 可以详细显示任务的进度信息 job.submit()这种方式是做不到的
                 */
                boolean isSuccessed = job.waitForCompletion(true);
                // job.submit(); 不推荐

                return isSuccessed ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        args = new String[] { 
                "hdfs://hive01:8020/input/ceshi.txt", 
                "hdfs://hive01:8020/outputtest"
                 };
        int status = ToolRunner.run(conf, new SecondSortMapReduce(), args);

        System.exit(status);
    }

}

你可能感兴趣的:(hadoop)