模拟道路监控车辆信息及sql操作和SparkStreamming消费者获取监控信息

模拟数据:

import java.io.PrintWriter
import java.text.SimpleDateFormat
import java.util.{Date, Properties}
import org.apache.kafka.clients.producer.{KafkaProducer, Producer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import scala.util.Random

object MockData {

  /**
   *
   * 获取几位随机数的方法
   * @param index 位数
   * @param random
   * @return
   */
  def randomNum(index:Int,random:Random): String ={
    var str=""
    for(i <- 0 until index) {
      str+=random.nextInt(10)
    }
    str
  }

  /**
   * 填充数据
   * @param index 填充几位
   * @param num  随机数范围
   * @param random
   */
  def filrZero(index:Int,num:Int,random: Random): String ={
    val randomNum = random.nextInt(num)
    var str=randomNum.toString
    if (randomNum<10){
      str=("%0"+index+"d").format(randomNum)
    }
    str
  }

  /**
   * 填充数据
   * @param index 填充几位
   * @param num  随机数范围
   * @param random
   */
  def filrZero2(index:Int,num:Int,random: Random): String ={
    val randomNum = random.nextInt(num)
    var str=randomNum.toString
    if (randomNum<20){
      str=("%0"+index+"d").format(randomNum)
    }
    str
  }

  /**
   * 往kafka中发送数据
   * @param content
   */
  def sendKafkaData(content:String): Unit ={
    val props = new Properties();
    props.put("bootstrap.servers", "star.com:9092");
    val producer = new KafkaProducer[String,String](props, new StringSerializer(), new StringSerializer());
    producer.send(new ProducerRecord[String,String]("topic_car",content));
    producer.close()

  }

/**
 * 初始化文件
 */
  def initFileData(path:String): PrintWriter ={
    new PrintWriter(path)
  }
  /**
   * 往本地文件中存储
   * @param content 内容
   */
  def sendFileData(pw:PrintWriter,content:String): Unit ={
    pw.write(content+"\n")

  }

  def fileDataClose(pw:PrintWriter): Unit ={
    pw.close()
  }

  def mock() : Unit ={

    val pw = initFileData("G:/data/MockData.txt")

    val random = new Random()
    for (i <- 1 to 200){
        //获取当天的时间
        val day = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
        //println(day)
        val locations=Array("鲁", "豫", "湘", "广", "深", "沪", "晋", "粤", "赣", "京")

        //65到90之间的一个随机数
        val a=(65+new Random().nextInt(26)).asInstanceOf[Char]
        //随机5位
        val b=randomNum(5,random )
        //车牌号
        var car=locations(random.nextInt(locations.length))+a+b
        for (i<- 1 until random.nextInt(100)){
          //抓拍时间1(当前时间)
          //val day2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())
          //抓拍时间2(随机时间)
          val actionTime=day+" "+filrZero(2,24,random)+":"+filrZero(2,59,random)+":"+filrZero(2,59,random)
          //车速
          val speed=random.nextInt(200)+1
          //道路 00-20填充0
          val readId= filrZero(2,20,random)
          //区 00-20填充0
          val areaId= filrZero(2,20,random)
          //卡口4位  前两位为0  后两位0-20
          val monitorId = filrZero2(4, 20, random)
          //摄像头5位
          val camoraId="0"+randomNum(4,random)

          val content=day+"\t"+monitorId+"\t"+camoraId+"\t"+car+"\t"+actionTime+"\t"+speed+"\t"+readId+"\t"+areaId
          sendKafkaData(content)
          sendFileData(pw, content)
        }
    }
    fileDataClose(pw)
    Thread.sleep(50)
  }
  def main(args: Array[String]): Unit = {
    mock()
  }
}

对数据进行sql操作:

import org.apache.spark.sql.SparkSession

object Test1 {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local").appName("test1").getOrCreate()
    spark.sparkContext.setLogLevel("WARN")

    val file=args(0)
    val df=spark.read
      .option("sep","\t")
      .schema("day string,monitor_id string,camera_id string,car string,action_time string,speed int,road_id string,area_id string")
      .csv(file)

    df.createTempView("mockdata")

    //9.	从data.txt中读取数据使用SQL按区,道路分组统计每个摄像头抓拍下的的最高车速,并按照区,道路,车速倒序排序。
    val df1=spark.sql("select area_id,road_id,max(speed) max_speed from mockdata group by area_id,road_id order by area_id,road_id,max_speed desc")
    df1.show()
    //10.	从data.txt中读取数据使用SQL统计每个摄像头下当天的平均车速。并且倒序排序。
    val df2 = spark.sql("select day,camera_id,round(avg(speed),2) avgspeed from mockdata group by day,camera_id order by avgspeed desc")
    df2.show()
    //11.	从data.txt中读取数据使用SQL统计出每个区每条路当天经过的车辆数量合计总数,并按照总数正序排序。
    val df3 = spark.sql("select day,road_id,area_id,count(distinct car) countcar from mockdata group by day,road_id,area_id order by countcar")
    df3.show()
    //12.	从data.txt中读取数据使用SQL统计出各个区,每条路车速最高的五条记录并按照车速倒序排序
    val df4=spark.sql("select t.* from ( select *,row_number() over(partition by road_id order by speed desc) rank from mockdata) t where t.rank<=5")
    df4.show()

    //13.	从data.txt中读取数据自定义函数其中最后一个数据01-04对应海淀区,05-07 对应和平区,08-09对应门头沟区,10-11 对应西城区,其余为房山区。
    spark.udf.register("get_area", (area_id: Int) => {
      var area = ""
      if (area_id >= 1 && area_id <= 4) {
        area = "海淀区"
      }else if(area_id >= 5 && area_id <= 7){
        area = "和平区"
      }else if(area_id >= 8 && area_id <= 9){
        area = "门头沟区"
      }else if(area_id >= 10 && area_id <= 11){
        area = "西城区"
      }else{
        area = "房山区"
      }
      area
    })
    //14.	从data.txt中读取数据利用8的自定义函数求出各个区的 平均车速
    val df5 = spark.sql("select get_area(area_id) area_name,round(avg(speed),2) avg_speed from mockdata group by area_name")
    df5.show()
    spark.close()
  }
}

SparkStreamming消费者获取监控信息:

import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe

object Test2 {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("test2")
    val sc = new SparkContext(conf)
    sc.setLogLevel("WARN")
    val ssc = new StreamingContext(sc, Seconds(5))

    //15.	创建SparkStreamming消费者获取监控信息
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "star.com:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean) //false 偏移量不自动提交
    )

    //连接主题
    val topics = Array("topic_car")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )
    //16.	统计该监控5s内车捕获到的车牌牌总数并打印
    val cars = stream.map(line => line.value().split("\t")(3))
    val result1 = cars.transform(rdd => rdd.distinct()).count()
    result1.print()

    //17.	统计出5s内同一车牌出现次数超过2次及以上的车牌号码(请在演示时展现2次以上的效果)
    val result2 = cars.map((_,1)).reduceByKey(_+_).filter(_._2 >= 2)
    result2.print()


    //手动提交偏移量
    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
    }

    ssc.start()            //启动
    ssc.awaitTermination() //阻塞
  }
}

linux 执行命令:

Test1
bin/spark-submit --class 全限定类名 xxxxx.jar file:/opt/spark-2.4.5/xxx.txt(数据文件)

Test2
bin/spark-submit --class 全限定类名 xxxxx.jar

你可能感兴趣的:(模拟道路监控车辆信息及sql操作和SparkStreamming消费者获取监控信息)