统计航班飞行

统计航班飞行网图中机场的数量:301
统计航班飞行网图中航线的数量:4088
计算最长的飞行航线:Edge(12173,12478,4983)
找出最繁忙的机场:(ATL,305)
找出最重要机场:(10397,(ATL,11.06024770803224))

import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object Need01 {
  def main(args: Array[String]): Unit = {
    val session = SparkSession.builder()
      .appName(this.getClass.getSimpleName)
      .master("local[1]")
      .getOrCreate()
    val ssc = session.sparkContext
    val flights = ssc.textFile("E:\\五道口大数据\\北美项目\\20200304\\USA Flight Datset - Spark Tutorial - Edureka.csv")
      .filter(!_.startsWith("月"))
      .map(x => x.split(",",-1))
    val airvetRDD: RDD[(VertexId, String)] = flights.flatMap(x => Array((x(5).toLong, x(6)), (x(7).toLong, x(8))))
    val lineedgeRDD: RDD[Edge[PartitionID]] = flights.map(x => (x(5).toLong, x(7).toLong, x(16).toInt)).distinct
      .map(x => Edge(x._1, x._2, x._3))


    val nowhere = "nowhere"
    //	计算最长的飞行航线(Point to Point)
   val graph = Graph(airvetRDD, lineedgeRDD, nowhere)
   graph.edges.sortBy(_.attr, ascending = false).take(1)
    //找出最繁忙的机场:(ATL,305)
    graph.mapVertices{case (id, ariPort)=>AriPort(ariPort,0,0,0)}
      .outerJoinVertices(graph.inDegrees){case (id,ariPort, in)=>(ariPort.ariPort, in.getOrElse(0),ariPort.out, in.getOrElse(0)+ariPort.out)}
      . outerJoinVertices (graph. outDegrees){case (id,ariPort , out)=>(ariPort._1,ariPort._2,out.getOrElse(0) ,out.getOrElse(0)+ariPort._2)}
      . vertices
      .sortBy(-_._2._4).take(num= 1)
      . foreach(println)
    case class AriPort(ariPort:String, in:Int ,out:Int, sum:Int)
    //找出最重要机场
    graph.pageRank(0.05).vertices.join(airvetRDD).sortBy(_._2._1, ascending = false).map(x=>(x._1,x._2)).take(1).foreach(println)
//
//
//    val numairports = graph.vertices.count()
//    val numroutes = graph.numEdges
//    val bussy = graph.triplets.sortBy(_.attr, ascending = false).map(triplet => "Distance %d from %s to %s.".format(triplet.attr, triplet.srcAttr, triplet.dstAttr)).take(1)(0)
//
//    graph.pageRank(0.05).vertices.join(airvetRDD).sortBy(_._2._1, ascending = false).map(_._2._2).take(1).foreach(println)
//    val (airport_id, in_degrees) = graph.inDegrees.sortBy(_._2, ascending=false).take(1)(0)
//    airports.filter { case(id, name) => id == airport_id }.collect foreach {
//      case(id, name) => println("%s has the maximum number of incoming flights - %d.".format(name, in_degrees))
//    }
//    val (airport_id, out_degrees) = graph.outDegrees.sortBy(_._2, ascending=false).take(1)(0)
//    airports.filter { case(id, name) => id == airport_id }.collect foreach {
//      case(id, name) => println("%s has the maximum number of outgoing flights - %d.".format(name, out_degrees))
//    }


  }

}

你可能感兴趣的:(统计航班飞行)