数据
10.0.0.1,192.168.0.1,10000
10.0.0.1,192.168.0.1,10001
10.0.0.1,192.168.0.1,10002
10.0.0.1,192.168.0.2,10000
10.0.0.1,192.168.0.2,10001
10.0.0.1,192.168.0.3,09999
10.0.0.1,192.168.0.3,10000
10.0.0.2,192.168.0.1,10001
10.0.0.2,192.168.0.1,10002
10.0.0.2,192.168.0.2,10000
10.0.0.2,192.168.0.2,10003
10.0.0.3,192.168.0.1,10000
10.0.0.3,192.168.0.1,10001
10.0.0.3,192.168.0.1,10001
10.0.0.3,192.168.0.1,10002
10.0.0.3,192.168.0.1,10004
10.0.0.3,192.168.0.3,10005
10.0.0.3,192.168.0.3,10006
10.0.0.3,192.168.0.3,10007
10.0.0.3,192.168.0.3,10008
打乱顺序
10.0.0.3,192.168.0.3,10005
10.0.0.1,192.168.0.1,10000
10.0.0.3,192.168.0.1,10001
10.0.0.2,192.168.0.2,10000
10.0.0.3,192.168.0.1,10004
10.0.0.3,192.168.0.1,10002
10.0.0.2,192.168.0.1,10001
10.0.0.1,192.168.0.1,10002
10.0.0.3,192.168.0.1,10000
10.0.0.3,192.168.0.3,10006
10.0.0.1,192.168.0.3,09999
10.0.0.3,192.168.0.3,10008
10.0.0.1,192.168.0.3,10000
10.0.0.1,192.168.0.1,10001
10.0.0.3,192.168.0.3,10007
10.0.0.1,192.168.0.2,10000
10.0.0.2,192.168.0.1,10002
10.0.0.1,192.168.0.2,10001
10.0.0.3,192.168.0.1,10001
10.0.0.2,192.168.0.2,10003
自定义结构
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class TriTuple implements WritableComparable
{
private Text serverIP;
private Text clientIP;
private Text time;
public TriTuple()
{
serverIP = new Text();
clientIP = new Text();
time = new Text();
}
public Text getServerIP()
{
return serverIP;
}
public void setServerIP(Text serverIP)
{
this.serverIP = serverIP;
}
public Text getClientIP()
{
return clientIP;
}
public void setClientIP(Text clientIP)
{
this.clientIP = clientIP;
}
public Text getTime()
{
return time;
}
public void setTime(Text time)
{
this.time = time;
}
public void readFields(DataInput in) throws IOException
{
serverIP.readFields(in);
clientIP.readFields(in);
time.readFields(in);
}
public void write(DataOutput out) throws IOException
{
serverIP.write(out);
clientIP.write(out);
time.write(out);
}
public int compareTo(TriTuple tt)
{
int cmp;
if(0 != (cmp = serverIP.compareTo(tt.serverIP)))
{
return cmp;
}
else
{
if(0 != (cmp = clientIP.compareTo(tt.clientIP)))
{
return cmp;
}
else
{
return time.compareTo(tt.time);
}
}
}
public int hashCode()
{
return serverIP.hashCode() * 31;
}
public boolean equals(Object o)
{
if(o instanceof TriTuple)
{
TriTuple tt = (TriTuple) o;
return serverIP.equals(tt.serverIP) && clientIP.equals(tt.clientIP)
&& time.equals(tt.time);
}
return false;
}
public String toString()
{
return serverIP + "," + clientIP + "," + time;
}
public static class FirstComparator extends WritableComparator
{
protected FirstComparator()
{
super(TriTuple.class, true);
}
public int compare(WritableComparable w1, WritableComparable w2)
{
int cmp1 = ((TriTuple) w1).getServerIP().compareTo(((TriTuple) w2).getServerIP());
int cmp2 = ((TriTuple) w1).getClientIP().compareTo(((TriTuple) w2).getClientIP());
if(0 == cmp1 && 0 == cmp2)
{
return 0;
}
else
{
if(0 != cmp1)
{
return cmp1;
}
else
{
return cmp2;
}
}
}
}
}
mapreduce + driver
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class MyJob
{
public static class MapClass extends Mapper
{
TriTuple outKey = new TriTuple();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String inValue = value.toString();
String[] field = inValue.split(",");
if(3 == field.length)
{
outKey.setClientIP(new Text(field[0]));
outKey.setServerIP(new Text(field[1]));
outKey.setTime(new Text(field[2]));
context.write(outKey, NullWritable.get());
}
}
}
public static class Reduce extends Reducer
{
Text outKey = new Text();
int counter = 0;
public void reduce(TriTuple key, Iterable values, Context context) throws IOException, InterruptedException
{
counter = 0;
for(NullWritable value : values)
{
++counter;
outKey.set(key.toString() + ", " + counter);
context.write(outKey, NullWritable.get());
}
}
}
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
Job job = new Job(conf, "MyJob");
job.setNumReduceTasks(10);
job.setJarByClass(MyJob.class);
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
job.setGroupingComparatorClass(TriTuple.FirstComparator.class);
//job.setPartitionerClass(PhNumPartitioner.class);
job.setMapOutputKeyClass(TriTuple.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果
192.168.0.1,10.0.0.1,10000, 1
192.168.0.1,10.0.0.1,10001, 2
192.168.0.1,10.0.0.1,10002, 3
192.168.0.1,10.0.0.2,10001, 1
192.168.0.1,10.0.0.2,10002, 2
192.168.0.1,10.0.0.3,10000, 1
192.168.0.1,10.0.0.3,10001, 2
192.168.0.1,10.0.0.3,10001, 3
192.168.0.1,10.0.0.3,10002, 4
192.168.0.1,10.0.0.3,10004, 5
192.168.0.2,10.0.0.1,10000, 1
192.168.0.2,10.0.0.1,10001, 2
192.168.0.2,10.0.0.2,10000, 1
192.168.0.2,10.0.0.2,10003, 2
192.168.0.3,10.0.0.1,09999, 1
192.168.0.3,10.0.0.1,10000, 2
192.168.0.3,10.0.0.3,10005, 1
192.168.0.3,10.0.0.3,10006, 2
192.168.0.3,10.0.0.3,10007, 3
192.168.0.3,10.0.0.3,10008, 4