hadoop@sh-hadoop:more sourText.txt hadoop|234|2346|sdfasdgadfgdfg spark|534|65745|fhsdfghdfgh hive|65|6585|shsfghfgh hbase|98|456|jhgjdfghj tachyon|345|567|sfhrtyhert kafka|455|567|dghrtyh storm|86|345|dgsdfg redis|45|56|ergerg sqoop|45|765|fghd flume|34|67|sdfgrty oozie|23|45|adfgdfg pig|54|456|dfg zookeeper|23|543|dfgd solr|75|54|ertgergt
hadoop@sh-hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/sourText.txt | wc -l 14 hadoop@sh-hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/Domain800_level2/merge1/out1/* | wc -l 14 hadoop@sh-hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/Domain800_level2/merge1/out1/* | more spark|534|65745|fhsdfghdfgh kafka|455|567|dghrtyh tachyon|345|567|sfhrtyhert hadoop|234|2346|sdfasdgadfgdfg hbase|98|456|jhgjdfghj storm|86|345|dgsdfg solr|75|54|ertgergt hive|65|6585|shsfghfgh pig|54|456|dfg redis|45|56|ergerg sqoop|45|765|fghd flume|34|67|sdfgrty oozie|23|45|adfgdfg zookeeper|23|543|dfgd hadoop@sh-hadoop:/home/hadoop/blb$
2.1、按照第二列进行降序排序:
sort -t "|" -nrk2 sourText.txt
hadoop@sh-hadoop:/home/hadoop/blb$ sort -t "|" -nrk2 sourText.txt spark|534|65745|fhsdfghdfgh kafka|455|567|dghrtyh tachyon|345|567|sfhrtyhert hadoop|234|2346|sdfasdgadfgdfg hbase|98|456|jhgjdfghj storm|86|345|dgsdfg solr|75|54|ertgergt hive|65|6585|shsfghfgh pig|54|456|dfg sqoop|45|765|fghd redis|45|56|ergerg flume|34|67|sdfgrty zookeeper|23|543|dfgd oozie|23|45|adfgdfg
hadoop@sh-hadoop:/home/hadoop/blb$ sort -t "|" -nrk3 sourText.txt spark|534|65745|fhsdfghdfgh hive|65|6585|shsfghfgh hadoop|234|2346|sdfasdgadfgdfg sqoop|45|765|fghd tachyon|345|567|sfhrtyhert kafka|455|567|dghrtyh zookeeper|23|543|dfgd pig|54|456|dfg hbase|98|456|jhgjdfghj storm|86|345|dgsdfg flume|34|67|sdfgrty redis|45|56|ergerg solr|75|54|ertgergt oozie|23|45|adfgdfg
排序后倒入新文件中:
附录:
MapReduce实现代码:
import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import mapreduce.SegmentUtil; public class Domain_merge { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage Domain800_level2 <input> <输出结果>"); System.exit(2); } Job job4 = Job.getInstance(conf, Domain_merge.class.getSimpleName()); job4.setJarByClass(Domain_merge.class); job4.setMapOutputKeyClass(Toptaobao500.class); job4.setMapOutputValueClass(Text.class); job4.setOutputKeyClass(Text.class); job4.setOutputValueClass(NullWritable.class); //job4.setPartitionerClass(MyPartitioner.class); job4.setMapperClass(MyMapper2.class); job4.setNumReduceTasks(1); job4.setReducerClass(MyReducer2.class); job4.setInputFormatClass(TextInputFormat.class); job4.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job4, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job4, new Path(otherArgs[1])); job4.waitForCompletion(true); } /** * 第二个Job排序 */ public static class MyMapper2 extends Mapper<LongWritable, Text, Toptaobao500, Text>{ Toptaobao500 mw=new Toptaobao500(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Toptaobao500, Text>.Context context) throws IOException, InterruptedException { String[] spl=value.toString().split("\\|"); String trait=spl[0].trim(); String uv=spl[1].trim(); String pv=spl[2].trim(); String fenlei=spl[3].trim(); mw.setkind(trait+"|"+uv+"|"+pv+"|"+fenlei); mw.setCount(Long.parseLong(uv.trim())); context.write(mw, new Text(value)); } } public static class MyReducer2 extends Reducer<Toptaobao500, Text, Text, NullWritable>{ @Override protected void reduce(Toptaobao500 k4, Iterable<Text> v4s, Reducer<Toptaobao500, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { for (Text v4 : v4s) { context.write(v4, NullWritable.get()); } } } public static class Toptaobao500 implements WritableComparable<Toptaobao500> { String kind; Long count; public Toptaobao500() { } public Toptaobao500(String kind, Long count) { this.kind = kind; this.count = count; } public void setkind(String kind) { this.kind = kind; } public void setCount(Long l) { this.count = l; } public String getKind() { return this.kind; } public Long getCount() { return this.count; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(kind); out.writeLong(count); } @Override public void readFields(DataInput in) throws IOException { this.kind = in.readUTF(); this.count = in.readLong(); } @Override public int compareTo(Toptaobao500 o) { long temp=this.count-o.count; if(temp>0){ temp=-1; return (int) temp; }else if(temp<0){ temp=1; return (int) temp; } return (int) (this.count-o.count); } @Override public boolean equals(Object obj) { return super.equals(obj); } @Override public int hashCode() { return super.hashCode(); } @Override public String toString() { return this.kind; } } }