自定义分组
NameGroup
package test;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
public class NameGroup implements RawComparator<ConsumeWritable>{
public int compare(ConsumeWritable o1, ConsumeWritable o2) {
return o1.getName().compareTo(o2.getName());
}
/**
* 封装key1:zhangsan,135.00 b1=12个字节 key2:yuti,11032 b2=8个字节
* 将组合key转为二进制数组
* 比较两个对象在二进制层面
* b1 第一个CosumeWritable对象转成的字节数据
* s1代表从b1的第几个字节比较
* l1代表b1的长度
* compareBytes(b1,s1,l1-4(比较字节个数))
*
*/
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return WritableComparator.compareBytes(b1, 0, l1-4, b2, 0, l2-4);
}
}
ConsumeWritable
package test;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class ConsumeWritable implements WritableComparable{
private String name;
private float money;
public ConsumeWritable() {}
public ConsumeWritable(String name, float money) {
super();
this.name = name;
this.money = money;
}
//从源码中的获得
public void set(String name,float money){
this.name=name;
this.money=money;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getMoney() {
return money;
}
public void setMoney(float money) {
this.money = money;
}
//序列化
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeFloat(money);
}
//反序列化
public void readFields(DataInput in) throws IOException {
name=in.readUTF();
money=in.readFloat();
}
public int compareTo(ConsumeWritable o) {
//第一次比较
int compareTo = this.getName().compareTo(o.getName());
if (compareTo !=0) {
return compareTo;
}
//第二次比较 注意:普通的数据类型是没有compaerTo方法 所以要转换为他的包装类
return Float.valueOf(this.getMoney()).compareTo(Float.valueOf(o.getMoney()));
}
//比较对象两个对象,需要重写equals和hashcode()方法
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Float.floatToIntBits(money);
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ConsumeWritable other = (ConsumeWritable) obj;
if (Float.floatToIntBits(money) != Float.floatToIntBits(other.money))
return false;
if (name == null) {
if (other.name != null)
return false;
} else if (!name.equals(other.name))
return false;
return true;
}
@Override
public String toString() {
return name + "," + money;
}
}
主要程序:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 主要思想:根据shuffle阶段排序是根据key来排序的
* @author Administrator
*
*/
public class SecondSortMapReduce extends Configured implements Tool{
//map映射
public static class SecondSortMapper extends Mapper<LongWritable, Text, ConsumeWritable, FloatWritable>{
private ConsumeWritable mapOutPutKey = new ConsumeWritable();
private FloatWritable mapOutPutValue= new FloatWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//把读取出来的内容 装换为String 类型
String line = value.toString();
//通过制表符分割
String[] split = line.split("\t");
mapOutPutKey.set(split[0], Float.valueOf(split[1]));
mapOutPutValue.set(Float.parseFloat(split[1]));
System.err.print("key: "+mapOutPutKey.toString());
System.err.print("->value: "+mapOutPutValue+"\n");
context.write(mapOutPutKey, mapOutPutValue);
}
}
//分区 参数是map输出的
public static class MyPartitoner extends Partitioner<ConsumeWritable, FloatWritable>{
@Override
public int getPartition(ConsumeWritable key, FloatWritable value,
int numPartitions) {
//根据hashpatitioner源码的得到
return (key.getName().hashCode() & Integer.MAX_VALUE) % numPartitions;
}
}
public static class SecondSortReducer extends Reducer<ConsumeWritable, FloatWritable, Text, FloatWritable>{
private Text OutPutKey =new Text();
private FloatWritable OutPutValue = new FloatWritable();
@Override
protected void reduce(ConsumeWritable key,Iterable values,Context context )throws IOException, InterruptedException {
System.out.print("key:"+key.toString()+"["+"value:");
OutPutKey.set(key.getName());
for (FloatWritable floatWritable : values) {
System.out.print(floatWritable+",");
OutPutValue.set(floatWritable.get());
context.write(OutPutKey, OutPutValue);
}
System.out.println("]"+"\n");
}
}
public int run(String[] args) throws Exception {
// 1.创建Configuration对象,获取配置文件
Configuration conf = new Configuration();
// 2.构建MapReduce Job对象
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(getClass());
// 3.输入目录/文件(input) -》 map -》 reduce -》输出路径 (output)
// 3.1 设置输入文件所在目录
Path inPath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inPath);
// 3.2 设置Map输出信息
job.setMapperClass(SecondSortMapper.class);
job.setMapOutputKeyClass(ConsumeWritable.class);
job.setMapOutputValueClass(FloatWritable.class);
//自定义分区
job.setPartitionerClass(MyPartitoner.class);
//自定义分组
job.setGroupingComparatorClass(NameGroup.class);
// 3.3设置reduce的输出信息
job.setReducerClass(SecondSortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
// 3.4 设置输出路径
Path outPath = new Path(args[1]);
FileSystem fs = outPath.getFileSystem(conf);
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
// 提交job
/**
* 可以详细显示任务的进度信息 job.submit()这种方式是做不到的
*/
boolean isSuccessed = job.waitForCompletion(true);
// job.submit(); 不推荐
return isSuccessed ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
args = new String[] {
"hdfs://hive01:8020/input/ceshi.txt",
"hdfs://hive01:8020/outputtest1"
};
int status = ToolRunner.run(conf, new SecondSortMapReduce(), args);
System.exit(status);
}
}