scala> sc.textFile("/README.txt").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).map(x =>(x._2,x._1)).sortByKey(false).map(x=>(x._2,x._1)).collect
res0: Array[(String, Int)] = Array(("",18), (the,8), (and,6), (of,5), (The,4), (this,3), (encryption,3), (for,3), (cryptographic,3), (Software,2), (which,2), (at:,2), (software,2), (re-export,2), (includes,2), (import,,2), (software.,2), (possession,,2), (our,2), (please,2), (distribution,2), (on,2), (using,2), (or,2), (use,,2), (information,2), (to,2), (software,,2), (more,2), (Export,2), (Hadoop,1), (Commodity,1), (For,1), (country,1), (under,1), (it,1), (Jetty,1), (Technology,1), (<http://www.wassenaar.org/>,1), (have,1), (http://wiki.apache.org/hadoop/,1), (BIS,1), (classified,1), (This,1), (following,1), (security,1), (See,1), (Number,1), (export,1), (reside,1), ((BIS),,1), (any,1), (makes,1), (algorithms.,1), (latest,1), (your,1), (SSL,1), (Administration,1), (provides,1), (Unrest...
scala>
[root@master IMF2016]#hadoop fs -put helloSpark.txt /
16/01/24 04:37:35 WARN util.NativeCodeLoader: Unable
to load native-hadoop library for your platform...
using builtin-java classes where applicable
[root@master IMF2016]#
[root@master IMF2016]#hadoop fs -put helloSpark.txt /
[root@master IMF2016]#cat helloSpark.txt
2 3
4 1
3 2
4 3
8 7
2 1
运行结果
2 1
2 3
3 2
4 1
4 3
8 7
源代码:
package com.dt.spark.SparkApps.cores;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class SecondaySortApp {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("SecondaySortApp").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf); //其底层实际上就是Scala的SparkContext
JavaRDD<String> lines = sc.textFile("G://IMFBigDataSpark2016//tesdata//helloSpark.txt");
JavaPairRDD<SecondarySortKey, String> pairs = lines.mapToPair(new PairFunction<String, SecondarySortKey, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<SecondarySortKey, String> call(String line) throws Exception {
String[] splited = line.split(" ");
SecondarySortKey key =new SecondarySortKey(Integer.valueOf(splited[0]),Integer.valueOf(splited[1]));
return new Tuple2<SecondarySortKey,String>(key,line);
}
});
JavaPairRDD<SecondarySortKey, String> sorted = pairs.sortByKey();
JavaRDD<String> SecondaySorted=sorted.map(new Function<Tuple2<SecondarySortKey,String>, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public String call(Tuple2<SecondarySortKey, String> sortedContent) throws Exception {
// TODO Auto-generated method stub
return sortedContent._2;
}
});
SecondaySorted.foreach(new VoidFunction<String>() {
@Override
public void call(String sorted) throws Exception {
System.out.println(sorted);
}
});
}
}
KEY的定义
package com.dt.spark.SparkApps.cores;
import java.io.Serializable;
import scala.math.Ordered;
public class SecondarySortKey implements Ordered<SecondarySortKey>,Serializable{
private int first;
private int second;
public int getFirst() {
return first;
}
public void setFirst(int first) {
this.first = first;
}
public int getSeconde() {
return second;
}
public void setSeconde(int seconde) {
this.second = seconde;
}
public SecondarySortKey(int first, int second){
this.first =first;
this.second = second;
}
@Override
public boolean $greater(SecondarySortKey other) {
if(this.first > other.getFirst()){
return true;
} else if (this.first==other.getFirst()&& this.second > other.getSeconde()) {
return true;
}
return false;
}
@Override
public boolean $greater$eq(SecondarySortKey other) {
if (this.$greater(other)){
return true;
} else if (this.first == other.getFirst() && this.second == other.getSeconde()) {
return true;
}
return false;
}
@Override
public boolean $less(SecondarySortKey other) {
if(this.first < other.getFirst() ) {
return true;
} else if (this.first == other.getFirst() && this.second < other.getSeconde()){
return true;
}
return false;
}
@Override
public boolean $less$eq(SecondarySortKey other) {
if (this.$less(other)){
return true;
} else if (this.first == other.getFirst() && this.second == other.getSeconde()) {
return true;
}
return false;
}
@Override
public int compare(SecondarySortKey other) {
if(this.first - other.getFirst() != 0){
return this.first - other.getFirst();
}else {
return this.second - other.getSeconde();
}
}
@Override
public int compareTo(SecondarySortKey other) {
if(this.first - other.getFirst() != 0){
return this.first - other.getFirst();
}else {
return this.second - other.getSeconde();
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + first;
result = prime * result + second;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
SecondarySortKey other = (SecondarySortKey) obj;
if (first != other.first)
return false;
if (second != other.second)
return false;
return true;
}
}