MapReduce实现二次排序续(十)

文章目录

  • 1. 前言
  • 2. 换一种文件格式
  • 3. 代码做部分修正
  • 4. 效果截图
  • 5. 小结

1. 前言

上一篇文章实现的二次排序key和value都是数字,接下来实现一组key为字母或单词,value为数字的二次排序。

2. 换一种文件格式

一通乱敲
MapReduce实现二次排序续(十)_第1张图片
上传文件

hadoop fs -put secondsort2 /secondsort2

3. 代码做部分修正

MyKey类

package com.mapreduce.secondarysort2;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class MyKey implements WritableComparable<MyKey> {

    private Text first = new Text();
    private int second = 0;
    public void set(Text first, int second){
        this.first = first;
        this.second = second;
    }

    public Text getFirst() {
        return first;
    }

    public int getSecond() {
        return second;
    }

    //这是比较的关键,对key进行比较时默认会调用compareTo()方法
    @Override
    public int compareTo(MyKey o) {
        if(first.compareTo(o.first) != 0){
            return first.compareTo(o.first);
        }else if(second != o.second){
            return second - o.second;
        }else{
            return 0;
        }
    }

    @Override
    public void write(DataOutput out) throws IOException {
        first.write(out);
        out.writeInt(second);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        first.readFields(in);
        second = in.readInt();
    }
    @Override
    public String toString(){
        return "<"+ first.toString() + ", "+ second + ">";
    }
/*
    @Override
    public int hashCode(){
        return first+"".hashCode()+second+"".hashCode();
    }
    @Override
    public boolean equals(Object right){
        if(right instanceof MyKey){
            MyKey myKey = (MyKey)right;
            return myKey.first == first && myKey.second == second;
        }else  {
            return false;
        }
    }
    */

}

MyMapper类

package com.mapreduce.secondarysort2;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MyMapper extends Mapper<LongWritable, Text, MyKey, IntWritable> {

    private final MyKey key = new MyKey();
    private final IntWritable value = new IntWritable();

    @Override
    public void map(LongWritable inkey, Text invalue, Context context)
        throws IOException, InterruptedException{
        String[] strs = invalue.toString().split("\t");
        System.out.println(strs[0]+ "\t" + Integer.parseInt(strs[1]));
        key.set(new Text(strs[0]), Integer.parseInt(strs[1]));
        value.set(Integer.parseInt(strs[1]));
        System.out.println("MyMappr : ");
        System.out.println("key = "+ key + ", value = "+ value);
        context.write(key, value);

    }
}

GroupingComparator类

package com.mapreduce.secondarysort2;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class GroupingComparator extends WritableComparator {

    public GroupingComparator(){super(MyKey.class, true);}

    @Override
    public int compare(WritableComparable a, WritableComparable b){
        MyKey myKey = (MyKey)a;
        MyKey myKey1 = (MyKey)b;
        //如果结果为0,则被分配到一个组内,然后每个组调用一次Reducer
        return myKey.getFirst().compareTo(myKey1.getFirst());
    }


}

MyReducer类

package com.mapreduce.secondarysort2;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MyReducer extends Reducer<MyKey, IntWritable, Text, IntWritable> {

    private final Text SIGN = new Text("********************");
    private final Text first = new Text();
    @Override
    public void reduce(MyKey key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException{
        System.out.println("Reducer : ");
        System.out.print("MyKey = "+key.toString() + "values = ");
        context.write(SIGN, null);//分组符
        first.set(key.getFirst()); // 获取第一个值传入到key中
        for(IntWritable value : values){ // values值会自动进行排序
            System.out.print(value+" ");
            context.write(first, value);
        }
        System.out.println();
    }
}

SecondarySortApp2类

package com.mapreduce.secondarysort2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.net.URI;

public class SecondarySortApp2 {

    private static final String INPUT_PATH = "hdfs://master002:9000/secondsort2";
    private static final String  OUTPUT_PATH = "hdfs://master002:9000/outputsecondsort2";
    public static void main(String[] args) throws Exception{
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        Configuration conf = new Configuration();
        //提升代码的健壮性
        final FileSystem fileSystem = FileSystem.get(URI.create(INPUT_PATH), conf);
        if(fileSystem.exists(new Path(OUTPUT_PATH))){
            fileSystem.delete(new Path(OUTPUT_PATH), true);
        }
        Job job = Job.getInstance(conf, "SecondarySort2App");
        //run jar class 主方法
        job.setJarByClass(SecondarySortApp2.class);
        //设置map
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(IntWritable.class);
        //设置reduce
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //设置Group
        job.setGroupingComparatorClass(GroupingComparator.class);
        //设置input format
        job.setInputFormatClass(TextInputFormat.class);
        FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
        //设置output format
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
        //提交job
        System.exit(job.waitForCompletion(true) ? 0 : 1);


    }
}

4. 效果截图

MapReduce实现二次排序续(十)_第2张图片

5. 小结

MapReduce基础编程到这里也就基本结束了,接下来。。。

  1. Yarn模型的介绍(MapReduce2.0)
  2. 数据仓库Hive
  3. 离线处理辅助系统Sqoop

你可能感兴趣的:(大数据,MapReduce)