* Input:
*
* name, time, value
* x,2,9
* y,2,5
* x,1,3
* y,1,7
* y,3,1
* x,3,6
* z,1,4
* z,2,8
* z,3,7
* z,4,0
*
*Output: generate a time-series looking like this:
* x => [(1,3), (2,9), (3,6)]
* y => [(1,7), (2,5), (3,1)]
* z => [(1,4), (2,8), (3,7), (4,0)]
- 3.2 SecondarySort类总结构
public class SecondarySort{
public static void main(String[] args) throws Exception {
//步骤2:读取输入参数并验证
//步骤3:创建一个javasparkcontext对象(ctx)
//步骤4:使用ctx创建JavaRDD
//步骤5:JavaRDD创建键值对,其中键是{name},值是{time,value}对
//步骤6:验证步骤5,打印出来
//步骤7:按键{name}对JavaRDD元素分组
//步骤8:验证步骤7,
//步骤9:对归约器值排序,将得到最终输出
//步骤10:验证步骤9,
ctx.close();
System.exit(0);
}
}
- 3.2.1 步骤2:读取输入参数
if (args.length < 2) {
System.err.println("Usage: SecondarySortUsingGroupByKey
- 3.2.2 步骤3:连接到sparkMaster
// STEP-2: Connect to the Spark master by creating JavaSparkContext object
final JavaSparkContext ctx = SparkUtil.createJavaSparkContext("SecondarySorting");
/**
* OUTPUT:
* (z,[(1,4),(2,8),(3,7)])
* ……
*/
List>>> output3 = sorted.collect();
for (Tuple2>> t : output3) {
Iterable> list = t._2;
System.out.println(t._1);
for (Tuple2 t2 : list) {
System.out.println(t2._1 + "," + t2._2);
}
System.out.println("=====");
}
sorted.saveAsTextFile(outputPath);
System.exit(0);
}
4.使用scala完成需求
def main(args: Array[String]): Unit = {
//
if (args.length != 3) {
println("Usage ")
sys.exit(1)
}
// val partitions = args(0).toInt
// val inputPath = args(1)
// val outputPath = args(2)
val partitions = 3
val inputPath = "C:\\Users\\Administrator\\Desktop\\Book Code\\input.txt"
val outputPath = " C:\\Users\\Administrator\\Desktop\\Book Code\\output.txt"
val config = new SparkConf
config.setAppName("SecondarySort")
val sc = new SparkContext(config)
val input = sc.textFile(inputPath)
//------------------------------------------------
// each input line/record has the following format:
// <,>
转:http://stackoverflow.com/questions/18145774/eclipse-an-error-occurred-while-filtering-resources
maven报错:
maven An error occurred while filtering resources
Maven -> Update Proje
在SVN服务控制台打开资源库“SVN无法读取current” ---摘自网络 写道 SVN无法读取current修复方法 Can't read file : End of file found
文件:repository/db/txn_current、repository/db/current
其中current记录当前最新版本号,txn_current记录版本库中版本