package com.hpe.traffic.skynet
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ListBuffer
import scala.util.Random
/**
* 作者:马豪伟NB
* 时间:2019/7/23 16:39
*/
object DemoCarManager01 extends App {
//todo:指定配置对象 得到spark的上下文对象
val sc: SparkContext = new SparkContext(new SparkConf().setAppName("demo01").setMaster("local[2]"))
sc.setLogLevel("WARN")
//todo:手动添加jar包
sc.addJar("lib/mysql-connector-java-5.1.39.jar")
//todo:设置默认抽取比例
val proportion = 0.1
//if(args(0) != null) proportion = args(0).toDouble
//todo:将抽取比例广播出去
val carProportion: Broadcast[Double] = sc.broadcast(proportion)
//todo:设置taskID 默认为2
val taskId = 2
// if(args(1) != null) taskId = args(1).toInt
//todo:广播任务id
val carTaskid: Broadcast[Int] = sc.broadcast(taskId)
//todo: 获取Hive的上下文
val hiveContext = new HiveContext(sc)
//todo: 对monitorFlowAction表的操作,查询出所有数据
//todo:| date|monitor_id|camera_id| car| action_time|speed|road_id|area_id|
//todo:|2017-04-25| 0001| 09203|京W47147|2017-04-25 20:58:17| 138| 49| 04|
val monitorFlowAction: DataFrame = hiveContext.sql("select * from traffic.monitor_flow_action")
//todo:对查出的数据过滤一遍,去掉脏数据,并进行缓存
//todo:[2017-04-25,0007,03107,鲁C22227,2017-04-25 07:59:33,47,45,05]
val carFilterRDD: RDD[Row] = monitorFlowAction.rdd.filter(_.size == 8).cache()
//todo:获取每个车牌的信息(车牌号,row原始信息)
//todo:(京G34986,[2017-04-25,0000,04064,京G34986,2017-04-25 24:15:45,206,21,03])
val carInfoRDD: RDD[(String, Row)] = carFilterRDD.map(line => (line.getString(3), line))
//todo:按车牌号进行分组
//todo:(京Y49667,CompactBuffer([2017-04-25,0005,09751,京Y49667,2017-04-25 13:36:53,242,28,03], [2017-04-25,0001,02150,京Y49667,2017-04-25 13:20:20,246,15,01], [2017-04-25,0001,04442,京Y49667,2017-04-25 13:46:53,39,5,04], [2017-04-25,0004,05759,京Y49667,2017-04-25 13:28:47,5,41,00], [2017-04-25,0003,01493,京Y49667,2017-04-25 13:42:25,20,23,04], [2017-04-25,0006,00378,京Y49667,2017-04-25 13:18:12,114,6,03]))
val carInfoGroupRDD: RDD[(String, Iterable[Row])] = carInfoRDD.groupByKey().cache()
//todo:切割时间,划分时间段(date hour,car_id),并去重
//todo:(2017-04-25 11,京H90846)
// val time: Array[String] = line.getString(4).split(" ")
// val hour: String = time(1).split(":")(0)
// (time(0) + " " + hour,line.getString(3))
val carMapRDD: RDD[(String, String)] = carFilterRDD.map(line => {
(line.getString(4).split(" ")(0) + " " + line.getString(4).split(" ")(1).split(":")(0), line.getString(3))
}).distinct()
//todo:按照日期和小时 进行分组
//todo:(2017-04-25 19,CompactBuffer(京F25322, 京F09300, 京Q36380, 京M51609, 京P54781, 京Q90081, 鲁L26893, 鲁M06118, 京M64873, 京X78115, 鲁F57753, 京I20232, 京W21328, 京V01494, 沪A95427, 京V78653, 京B55079, 鲁V57250, 京X21476, 鲁S78029, 沪T42195, 京B89720, 深Y05206, 沪G65754, 鲁V89859, 京L11468, 京C47487, 京A97921, 深K86047, 京K21170, 京W98473, 京B53007, 深G22589, 京A97626, 鲁H02243, 京C55831, 京Z28470, 京J78207, 京D74778, 京T54414, 京Z87594, 京M54857, 京K34034, 沪N37472, 京K31936, 京K39231, 京L59976, 鲁S41555, 京V13605, 京S71914, 京Y02939, 鲁F40220, 京T14412, 沪U44920, 京B67542, 深K59063, 京L25505, 京W58960, 鲁D29089, 京M93956, 京Y07404, 京Q37313, 京S20525, 深C01814, 京S47855, 京O83653, 京H00162, 沪P07921, 京P37533, 京V56887, 京H50888, 京S83940, 京L51548, 沪M68150, 京Q21323, 深Q33117, 深Y43526, 京K74143, 京Y04971, 京S24628, 京P50083, 深A83847, 京C72342, 京W79773, 京G49891, 京C59566, 京V80846, 京V32257, 京B88805, 京R38772, 京A81445, 京M80675, 京O03951, 深H69888, 京P26888, 京Y44201, 京D37652, 沪I38192, 鲁T39613, 京U45423, 京C94667, 鲁R16964, 深L34348, 京G43042, 沪J18096, 京V29429, 深M44735, 京S18969, 京J03291, 深G56631, 深P21747, 鲁X39061, 京Z32433, 京J47411, 鲁D03738, 京K09350, 京G99800, 京S13904, 京I20353, 京I89878, 京U36471, 京E95907, 京V76631, 京M11933, 鲁Y25888, 京R31144, 京B44386, 鲁Q92896, 深S91241, 京P98922, 京U13768))
val carGroupRDD: RDD[(String, Iterable[String])] = carMapRDD.groupByKey()
//todo:每小时车流量(时间段,(车流量,车牌号))
//todo:(2017-04-25 19,(131,CompactBuffer(京F25322, 京F09300, 京Q36380, 京M51609, 京P54781, 京Q90081, 鲁L26893, 鲁M06118, 京M64873, 京X78115, 鲁F57753, 京I20232, 京W21328, 京V01494, 沪A95427, 京V78653, 京B55079, 鲁V57250, 京X21476, 鲁S78029, 沪T42195, 京B89720, 深Y05206, 沪G65754, 鲁V89859, 京L11468, 京C47487, 京A97921, 深K86047, 京K21170, 京W98473, 京B53007, 深G22589, 京A97626, 鲁H02243, 京C55831, 京Z28470, 京J78207, 京D74778, 京T54414, 京Z87594, 京M54857, 京K34034, 沪N37472, 京K31936, 京K39231, 京L59976, 鲁S41555, 京V13605, 京S71914, 京Y02939, 鲁F40220, 京T14412, 沪U44920, 京B67542, 深K59063, 京L25505, 京W58960, 鲁D29089, 京M93956, 京Y07404, 京Q37313, 京S20525, 深C01814, 京S47855, 京O83653, 京H00162, 沪P07921, 京P37533, 京V56887, 京H50888, 京S83940, 京L51548, 沪M68150, 京Q21323, 深Q33117, 深Y43526, 京K74143, 京Y04971, 京S24628, 京P50083, 深A83847, 京C72342, 京W79773, 京G49891, 京C59566, 京V80846, 京V32257, 京B88805, 京R38772, 京A81445, 京M80675, 京O03951, 深H69888, 京P26888, 京Y44201, 京D37652, 沪I38192, 鲁T39613, 京U45423, 京C94667, 鲁R16964, 深L34348, 京G43042, 沪J18096, 京V29429, 深M44735, 京S18969, 京J03291, 深G56631, 深P21747, 鲁X39061, 京Z32433, 京J47411, 鲁D03738, 京K09350, 京G99800, 京S13904, 京I20353, 京I89878, 京U36471, 京E95907, 京V76631, 京M11933, 鲁Y25888, 京R31144, 京B44386, 鲁Q92896, 深S91241, 京P98922, 京U13768)))
val carMapCountRDD: RDD[(String, (Int, Iterable[String]))] = carGroupRDD.map(line => (line._1, (line._2.size, line._2)))
//todo:各个时间段的随机抽取车辆,返回(随机车牌号,时间段)
//todo:ListBuffer((京B67542,2017-04-25 19), (京W98473,2017-04-25 19), (京R38772,2017-04-25 19), (京V32257,2017-04-25 19), (京A81445,2017-04-25 19), (鲁S41555,2017-04-25 19), (鲁V89859,2017-04-25 19), (鲁X39061,2017-04-25 19), (京K31936,2017-04-25 19), (京L59976,2017-04-25 19), (京S47855,2017-04-25 19), (京V76631,2017-04-25 19), (京U36471,2017-04-25 19))
val carRandomRDD = carMapCountRDD.map(line => {
val random: Int = (line._2._1 * carProportion.value).toInt
val car: Array[String] = line._2._2.toArray
val list = new ListBuffer[Int]
val carList = new ListBuffer[(String, String)]
for (i <- 0 until random) {
var index: Int = Random.nextInt(line._2._1)
while (list.contains(index)) {
index = Random.nextInt(line._2._1)
}
list.+=(index)
carList.+=((car(index), line._1))
}
carList
})
//todo:进行压平操作,并进行缓存
//todo:(深S91241,2017-04-25 19)
val flatRDD: RDD[(String, String)] = carRandomRDD.flatMap(_.toList).cache()
//todo:存储随机抽取的车辆(车牌号,时间段),存储到mysql中的random_extract_car表中
flatRDD.foreachPartition(line=>{
Class.forName("com.mysql.jdbc.Driver")
val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
val sql = "insert into random_extract_car values(?,?,?,?)"
val sta: PreparedStatement = conn.prepareStatement(sql)
while(line.hasNext){
val row: (String, String) = line.next()
sta.setObject(1,carTaskid.value.toString)
sta.setObject(2,row._1.getBytes("utf-8"))
sta.setObject(3,row._2.split(" ")(0))
sta.setObject(4,row._2.split(" ")(1))
sta.addBatch()
}
sta.executeBatch()
conn.close()
})
//todo:对数据去重,因为不同时间段有可能抽出同一辆车辆,数据会重复,因为我们下一步要根据车牌号去carInfoRDD里去查抽到的车牌号的详细信息,所以如果车牌号重复,就会抽取到重复的数据
//todo:鲁G63207
//todo:京U45423
//todo:鲁X76579
//todo:京E17383
val flatMapRDD: RDD[String] = flatRDD.groupByKey().map((_._1)).cache()
//todo:将去重后的车牌号广播出去
val flatRDDBroadcast: Broadcast[Array[String]] = sc.broadcast(flatMapRDD.collect())
//todo:查看广播变量进行过滤
//todo:[2017-04-25,0000,00362,京K44118,2017-04-25 20:34:43,203,19,04]
val carMapJoinRDD: RDD[Row] = carInfoRDD.filter(x=>flatRDDBroadcast.value.contains(x._1)).map((_._2)).cache()
//todo:将这些车辆信息数据存储到数据库的random_extract_car_detail_info表中
carMapJoinRDD.foreachPartition(x=>{
Class.forName("com.mysql.jdbc.Driver")
val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
val sql = "insert into random_extract_car_detail_info values(?,?,?,?,?,?,?,?)"
val sta: PreparedStatement = conn.prepareStatement(sql)
while(x.hasNext){
val row: Row = x.next()
sta.setObject(1,carTaskid.value.toString)
sta.setObject(2,row.getString(0))
sta.setObject(3,row.getString(1))
sta.setObject(4,row.getString(2))
sta.setObject(5,row.getString(3).getBytes("utf-8"))
sta.setObject(6,row.getString(4))
sta.setObject(7,row.getString(5))
sta.setObject(8,row.getString(6))
sta.addBatch()
}
sta.executeBatch()
conn.close()
})
//todo:将抽取到的车辆详细数据按车牌号和日期作为键,卡口号作为值,得到一个RDD
//todo:(京S46897 2017-04-25,0004)
val carAndMonitorRDD: RDD[(String, String)] = carMapJoinRDD.map(x=>(x.getString(3) + " " + x.getString(0),x.getString(1)))
//todo:按键进行分组,值为卡口号的迭代器
//todo:(京S75659 2017-04-25,CompactBuffer(0001, 0003, 0005, 0006, 0002, 0008, 0003, 0000, 0004, 0002, 0005, 0002, 0006, 0003, 0007, 0004, 0006, 0002, 0004))
val carAndMontiorsRDD: RDD[(String, Iterable[String])] = carAndMonitorRDD.groupByKey()
//todo:当天每辆车经过的卡口,并插入到表car_track
carAndMonitorRDD.foreachPartition(line=>{
Class.forName("com.mysql.jdbc.Driver")
val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/1701b","root","123456")
val sql = "insert into car_track values(?,?,?,?)"
val sta: PreparedStatement = conn.prepareStatement(sql)
while(line.hasNext){
val row: (String, String) = line.next()
sta.setObject(1,carTaskid.value.toString)
sta.setObject(2,row._1.split(" ")(0).getBytes())
sta.setObject(3,row._1.split(" ")(1))
sta.setObject(4,row._2)
sta.addBatch()
}
sta.executeBatch()
conn.close()
})
sc.stop()
}