智慧交通车辆管理实现车辆出现的时间段和卡口

package com.hpe.traffic.skynet


import java.sql.{Connection, DriverManager, PreparedStatement}

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ListBuffer
import scala.util.Random

/**
  * 作者:马豪伟NB
  * 时间:2019/7/23 16:39
  */
object DemoCarManager01 extends App {
  //todo:指定配置对象 得到spark的上下文对象
  val sc: SparkContext = new SparkContext(new SparkConf().setAppName("demo01").setMaster("local[2]"))
  sc.setLogLevel("WARN")
  //todo:手动添加jar包
  sc.addJar("lib/mysql-connector-java-5.1.39.jar")
  //todo:设置默认抽取比例
  val proportion = 0.1
  //if(args(0) != null) proportion = args(0).toDouble
  //todo:将抽取比例广播出去
  val carProportion: Broadcast[Double] = sc.broadcast(proportion)
  //todo:设置taskID  默认为2
  val taskId = 2
  //  if(args(1) != null) taskId = args(1).toInt
  //todo:广播任务id
  val carTaskid: Broadcast[Int] = sc.broadcast(taskId)
  //todo: 获取Hive的上下文
  val hiveContext = new HiveContext(sc)
  //todo: 对monitorFlowAction表的操作,查询出所有数据
  //todo:|      date|monitor_id|camera_id|     car|        action_time|speed|road_id|area_id|
  //todo:|2017-04-25|      0001|    09203|京W47147|2017-04-25 20:58:17|  138|     49|     04|
  val monitorFlowAction: DataFrame = hiveContext.sql("select * from traffic.monitor_flow_action")
  
  //todo:对查出的数据过滤一遍,去掉脏数据,并进行缓存
  //todo:[2017-04-25,0007,03107,鲁C22227,2017-04-25 07:59:33,47,45,05]
  val carFilterRDD: RDD[Row] = monitorFlowAction.rdd.filter(_.size == 8).cache()
  
  //todo:获取每个车牌的信息(车牌号,row原始信息)
  //todo:(京G34986,[2017-04-25,0000,04064,京G34986,2017-04-25 24:15:45,206,21,03])
  val carInfoRDD: RDD[(String, Row)] = carFilterRDD.map(line => (line.getString(3), line))
  
  //todo:按车牌号进行分组
  //todo:(京Y49667,CompactBuffer([2017-04-25,0005,09751,京Y49667,2017-04-25 13:36:53,242,28,03], [2017-04-25,0001,02150,京Y49667,2017-04-25 13:20:20,246,15,01], [2017-04-25,0001,04442,京Y49667,2017-04-25 13:46:53,39,5,04], [2017-04-25,0004,05759,京Y49667,2017-04-25 13:28:47,5,41,00], [2017-04-25,0003,01493,京Y49667,2017-04-25 13:42:25,20,23,04], [2017-04-25,0006,00378,京Y49667,2017-04-25 13:18:12,114,6,03]))
  val carInfoGroupRDD: RDD[(String, Iterable[Row])] = carInfoRDD.groupByKey().cache()

  //todo:切割时间,划分时间段(date hour,car_id),并去重
  //todo:(2017-04-25 11,京H90846)
  //  val time: Array[String] = line.getString(4).split(" ")
  //  val hour: String = time(1).split(":")(0)
  //  (time(0) + " " + hour,line.getString(3))
  val carMapRDD: RDD[(String, String)] = carFilterRDD.map(line => {
    (line.getString(4).split(" ")(0) + " " + line.getString(4).split(" ")(1).split(":")(0), line.getString(3))
  }).distinct()
  
  //todo:按照日期和小时 进行分组
  //todo:(2017-04-25 19,CompactBuffer(京F25322, 京F09300, 京Q36380, 京M51609, 京P54781, 京Q90081, 鲁L26893, 鲁M06118, 京M64873, 京X78115, 鲁F57753, 京I20232, 京W21328, 京V01494, 沪A95427, 京V78653, 京B55079, 鲁V57250, 京X21476, 鲁S78029, 沪T42195, 京B89720, 深Y05206, 沪G65754, 鲁V89859, 京L11468, 京C47487, 京A97921, 深K86047, 京K21170, 京W98473, 京B53007, 深G22589, 京A97626, 鲁H02243, 京C55831, 京Z28470, 京J78207, 京D74778, 京T54414, 京Z87594, 京M54857, 京K34034, 沪N37472, 京K31936, 京K39231, 京L59976, 鲁S41555, 京V13605, 京S71914, 京Y02939, 鲁F40220, 京T14412, 沪U44920, 京B67542, 深K59063, 京L25505, 京W58960, 鲁D29089, 京M93956, 京Y07404, 京Q37313, 京S20525, 深C01814, 京S47855, 京O83653, 京H00162, 沪P07921, 京P37533, 京V56887, 京H50888, 京S83940, 京L51548, 沪M68150, 京Q21323, 深Q33117, 深Y43526, 京K74143, 京Y04971, 京S24628, 京P50083, 深A83847, 京C72342, 京W79773, 京G49891, 京C59566, 京V80846, 京V32257, 京B88805, 京R38772, 京A81445, 京M80675, 京O03951, 深H69888, 京P26888, 京Y44201, 京D37652, 沪I38192, 鲁T39613, 京U45423, 京C94667, 鲁R16964, 深L34348, 京G43042, 沪J18096, 京V29429, 深M44735, 京S18969, 京J03291, 深G56631, 深P21747, 鲁X39061, 京Z32433, 京J47411, 鲁D03738, 京K09350, 京G99800, 京S13904, 京I20353, 京I89878, 京U36471, 京E95907, 京V76631, 京M11933, 鲁Y25888, 京R31144, 京B44386, 鲁Q92896, 深S91241, 京P98922, 京U13768))
  val carGroupRDD: RDD[(String, Iterable[String])] = carMapRDD.groupByKey()
  
  //todo:每小时车流量(时间段,(车流量,车牌号))
  //todo:(2017-04-25 19,(131,CompactBuffer(京F25322, 京F09300, 京Q36380, 京M51609, 京P54781, 京Q90081, 鲁L26893, 鲁M06118, 京M64873, 京X78115, 鲁F57753, 京I20232, 京W21328, 京V01494, 沪A95427, 京V78653, 京B55079, 鲁V57250, 京X21476, 鲁S78029, 沪T42195, 京B89720, 深Y05206, 沪G65754, 鲁V89859, 京L11468, 京C47487, 京A97921, 深K86047, 京K21170, 京W98473, 京B53007, 深G22589, 京A97626, 鲁H02243, 京C55831, 京Z28470, 京J78207, 京D74778, 京T54414, 京Z87594, 京M54857, 京K34034, 沪N37472, 京K31936, 京K39231, 京L59976, 鲁S41555, 京V13605, 京S71914, 京Y02939, 鲁F40220, 京T14412, 沪U44920, 京B67542, 深K59063, 京L25505, 京W58960, 鲁D29089, 京M93956, 京Y07404, 京Q37313, 京S20525, 深C01814, 京S47855, 京O83653, 京H00162, 沪P07921, 京P37533, 京V56887, 京H50888, 京S83940, 京L51548, 沪M68150, 京Q21323, 深Q33117, 深Y43526, 京K74143, 京Y04971, 京S24628, 京P50083, 深A83847, 京C72342, 京W79773, 京G49891, 京C59566, 京V80846, 京V32257, 京B88805, 京R38772, 京A81445, 京M80675, 京O03951, 深H69888, 京P26888, 京Y44201, 京D37652, 沪I38192, 鲁T39613, 京U45423, 京C94667, 鲁R16964, 深L34348, 京G43042, 沪J18096, 京V29429, 深M44735, 京S18969, 京J03291, 深G56631, 深P21747, 鲁X39061, 京Z32433, 京J47411, 鲁D03738, 京K09350, 京G99800, 京S13904, 京I20353, 京I89878, 京U36471, 京E95907, 京V76631, 京M11933, 鲁Y25888, 京R31144, 京B44386, 鲁Q92896, 深S91241, 京P98922, 京U13768)))
  val carMapCountRDD: RDD[(String, (Int, Iterable[String]))] = carGroupRDD.map(line => (line._1, (line._2.size, line._2)))
  
  //todo:各个时间段的随机抽取车辆,返回(随机车牌号,时间段)
  //todo:ListBuffer((京B67542,2017-04-25 19), (京W98473,2017-04-25 19), (京R38772,2017-04-25 19), (京V32257,2017-04-25 19), (京A81445,2017-04-25 19), (鲁S41555,2017-04-25 19), (鲁V89859,2017-04-25 19), (鲁X39061,2017-04-25 19), (京K31936,2017-04-25 19), (京L59976,2017-04-25 19), (京S47855,2017-04-25 19), (京V76631,2017-04-25 19), (京U36471,2017-04-25 19))
  val carRandomRDD = carMapCountRDD.map(line => {
    val random: Int = (line._2._1 * carProportion.value).toInt
    val car: Array[String] = line._2._2.toArray
    val list = new ListBuffer[Int]
    val carList = new ListBuffer[(String, String)]
    for (i <- 0 until random) {
      var index: Int = Random.nextInt(line._2._1)
      while (list.contains(index)) {
        index = Random.nextInt(line._2._1)
      }
      list.+=(index)
      carList.+=((car(index), line._1))
    }
    carList
  })
  
  //todo:进行压平操作,并进行缓存
  //todo:(深S91241,2017-04-25 19)
  val flatRDD: RDD[(String, String)] = carRandomRDD.flatMap(_.toList).cache()
  
  //todo:存储随机抽取的车辆(车牌号,时间段),存储到mysql中的random_extract_car表中
  flatRDD.foreachPartition(line=>{
    Class.forName("com.mysql.jdbc.Driver")
    val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
    val sql = "insert into random_extract_car values(?,?,?,?)"
    val sta: PreparedStatement = conn.prepareStatement(sql)
    while(line.hasNext){
      val row: (String, String) = line.next()
      sta.setObject(1,carTaskid.value.toString)
      sta.setObject(2,row._1.getBytes("utf-8"))
      sta.setObject(3,row._2.split(" ")(0))
      sta.setObject(4,row._2.split(" ")(1))
      sta.addBatch()
    }
    sta.executeBatch()
    conn.close()
  })

  //todo:对数据去重,因为不同时间段有可能抽出同一辆车辆,数据会重复,因为我们下一步要根据车牌号去carInfoRDD里去查抽到的车牌号的详细信息,所以如果车牌号重复,就会抽取到重复的数据
  //todo:鲁G63207
  //todo:京U45423
  //todo:鲁X76579
  //todo:京E17383
  val flatMapRDD: RDD[String] = flatRDD.groupByKey().map((_._1)).cache()
  
  //todo:将去重后的车牌号广播出去
  val flatRDDBroadcast: Broadcast[Array[String]] = sc.broadcast(flatMapRDD.collect())
  
  //todo:查看广播变量进行过滤
  //todo:[2017-04-25,0000,00362,京K44118,2017-04-25 20:34:43,203,19,04]
  val carMapJoinRDD: RDD[Row] = carInfoRDD.filter(x=>flatRDDBroadcast.value.contains(x._1)).map((_._2)).cache()
  
  //todo:将这些车辆信息数据存储到数据库的random_extract_car_detail_info表中
  carMapJoinRDD.foreachPartition(x=>{
    Class.forName("com.mysql.jdbc.Driver")
    val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
    val sql = "insert into random_extract_car_detail_info values(?,?,?,?,?,?,?,?)"
    val sta: PreparedStatement = conn.prepareStatement(sql)
    while(x.hasNext){
      val row: Row = x.next()
      sta.setObject(1,carTaskid.value.toString)
      sta.setObject(2,row.getString(0))
      sta.setObject(3,row.getString(1))
      sta.setObject(4,row.getString(2))
      sta.setObject(5,row.getString(3).getBytes("utf-8"))
      sta.setObject(6,row.getString(4))
      sta.setObject(7,row.getString(5))
      sta.setObject(8,row.getString(6))
      sta.addBatch()
    }
    sta.executeBatch()
    conn.close()
  })

  //todo:将抽取到的车辆详细数据按车牌号和日期作为键,卡口号作为值,得到一个RDD
  //todo:(京S46897 2017-04-25,0004)
  val carAndMonitorRDD: RDD[(String, String)] = carMapJoinRDD.map(x=>(x.getString(3) + " " + x.getString(0),x.getString(1)))
  
  //todo:按键进行分组,值为卡口号的迭代器
  //todo:(京S75659 2017-04-25,CompactBuffer(0001, 0003, 0005, 0006, 0002, 0008, 0003, 0000, 0004, 0002, 0005, 0002, 0006, 0003, 0007, 0004, 0006, 0002, 0004))
  val carAndMontiorsRDD: RDD[(String, Iterable[String])] = carAndMonitorRDD.groupByKey()
  
  //todo:当天每辆车经过的卡口,并插入到表car_track
  carAndMonitorRDD.foreachPartition(line=>{
    Class.forName("com.mysql.jdbc.Driver")
    val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
    val sql = "insert into car_track values(?,?,?,?)"
    val sta: PreparedStatement = conn.prepareStatement(sql)
    while(line.hasNext){
      val row: (String, String) = line.next()
      sta.setObject(1,carTaskid.value.toString)
      sta.setObject(2,row._1.split(" ")(0).getBytes())
      sta.setObject(3,row._1.split(" ")(1))
      sta.setObject(4,row._2)
      sta.addBatch()
    }
    sta.executeBatch()
    conn.close()
  })

  sc.stop()
}

你可能感兴趣的:(功能代码)