统计航班飞行网图中机场的数量:301
统计航班飞行网图中航线的数量:4088
计算最长的飞行航线:Edge(12173,12478,4983)
找出最繁忙的机场:(ATL,305)
找出最重要机场:(10397,(ATL,11.06024770803224))
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object Need01 {
def main(args: Array[String]): Unit = {
val session = SparkSession.builder()
.appName(this.getClass.getSimpleName)
.master("local[1]")
.getOrCreate()
val ssc = session.sparkContext
val flights = ssc.textFile("E:\\五道口大数据\\北美项目\\20200304\\USA Flight Datset - Spark Tutorial - Edureka.csv")
.filter(!_.startsWith("月"))
.map(x => x.split(",",-1))
val airvetRDD: RDD[(VertexId, String)] = flights.flatMap(x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8))))
val lineedgeRDD: RDD[Edge[PartitionID]] = flights.map(x => (x(5).toLong, x(7).toLong, x(16).toInt)).distinct
.map(x => Edge(x._1, x._2, x._3))
val nowhere = "nowhere"
// 计算最长的飞行航线(Point to Point)
val graph = Graph(airvetRDD, lineedgeRDD, nowhere)
graph.edges.sortBy(_.attr, ascending = false).take(1)
//找出最繁忙的机场:(ATL,305)
graph.mapVertices{case (id, ariPort)=>AriPort(ariPort,0,0,0)}
.outerJoinVertices(graph.inDegrees){case (id,ariPort, in)=>(ariPort.ariPort, in.getOrElse(0),ariPort.out, in.getOrElse(0)+ariPort.out)}
. outerJoinVertices (graph. outDegrees){case (id,ariPort , out)=>(ariPort._1,ariPort._2,out.getOrElse(0) ,out.getOrElse(0)+ariPort._2)}
. vertices
.sortBy(-_._2._4).take(num= 1)
. foreach(println)
case class AriPort(ariPort:String, in:Int ,out:Int, sum:Int)
//找出最重要机场
graph.pageRank(0.05).vertices.join(airvetRDD).sortBy(_._2._1, ascending = false).map(x=>(x._1,x._2)).take(1).foreach(println)
//
//
// val numairports = graph.vertices.count()
// val numroutes = graph.numEdges
// val bussy = graph.triplets.sortBy(_.attr, ascending = false).map(triplet => "Distance %d from %s to %s.".format(triplet.attr, triplet.srcAttr, triplet.dstAttr)).take(1)(0)
//
// graph.pageRank(0.05).vertices.join(airvetRDD).sortBy(_._2._1, ascending = false).map(_._2._2).take(1).foreach(println)
// val (airport_id, in_degrees) = graph.inDegrees.sortBy(_._2, ascending=false).take(1)(0)
// airports.filter { case(id, name) => id == airport_id }.collect foreach {
// case(id, name) => println("%s has the maximum number of incoming flights - %d.".format(name, in_degrees))
// }
// val (airport_id, out_degrees) = graph.outDegrees.sortBy(_._2, ascending=false).take(1)(0)
// airports.filter { case(id, name) => id == airport_id }.collect foreach {
// case(id, name) => println("%s has the maximum number of outgoing flights - %d.".format(name, out_degrees))
// }
}
}