数据集Data01.txt如下
Aaron,OperatingSystem,100
Aaron,Python,50
Aaron,ComputerNetwork,30
Aaron,Software,94
Abbott,DataBase,18
Abbott,Python,82
Abbott,ComputerNetwork,76
Abel,Algorithm,30
Abel,DataStructure,38
Abel,OperatingSystem,38
Abel,ComputerNetwork,92
Abraham,DataStructure,12
Abraham,ComputerNetwork,78
Abraham,Software,98
Adair,DataBase,20
Adair,Python,98
Adair,Software,88
Adam,Algorithm,18
Adam,ComputerNetwork,70
Adam,Software,80
Adolph,DataStructure,82
Adolph,CLanguage,100
Adolph,ComputerNetwork,70
Adolph,Software,18
Adonis,DataBase,86
Adonis,Algorithm,34
Adonis,DataStructure,52
Adonis,CLanguage,30
Adonis,Python,86
Alan,Algorithm,48
...........................................
val dataFile = sc.textFile("file:///root/Data01.txt")
dataFile.map(s => s.split(",").reverse(2)).distinct().count()
dataFile.map(s => s.split(",").reverse(1)).distinct().count()
dataFile.filter(line => line.contains("Tom")).map(s => s.split(",").reverse(0).toInt).sum() / dataFile.filter(line => line.contains("Tom")).count()
dataFile.map(s => s.split(",").reverse(2)).map(s => (s,1)).reduceByKey(_+_).foreach(println)
dataFile.map(s => s.split(",").reverse(1)).filter(l => l.contains("DataBase")).count()
dataFile.map(s => (s.split(",")(1),s.split(",")(2).toInt)).mapValues(x => (x, 1)).reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2)).mapValues(x => x._1.toDouble / x._2.toDouble).foreach(println)