使用Flume采集流式数据发送到Kafka,再由Flink消费Kafka数据,实现车牌号统计

3.配置Flume监听5566端口,采集数据,channel使用memory channel
4.运行Flume和Kafka,将Flume采集的数据发送到KafKa
5.使用IDEA创建Maven工程,编写Flink程序,接收并消费KafKa中的数据
6.统计出5s内车牌出现的总次数并打印到控制台
7.统计出5s内所有车牌尾号是偶数的车牌号码并打印到控制台
8.统计出10s内同一车牌出现次数超过2次及以上的车牌号码并打印到控制台

object CarTest {

def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 配置kafka和zookeeper的ip和端口
val pro: Properties = new Properties()
pro.setProperty(“bootstrap.servers”,“192.168.26.134:9092”)
pro.setProperty(“zookeeper.connect”,“192.168.26.134:2181”)
pro.setProperty(“group.id”,“cartest”)
//设置并行
env.setParallelism(1)
// 记载kafka和zookeeper的配置
// val consumer: FlinkKafkaConsumer011[String] = new FlinkKafkaConsumer011[String](“cartest”, new SimpleStringSchema(), pro)
val consumer: FlinkKafkaConsumer011[String] = new FlinkKafkaConsumer011[String](“cartest”, new SimpleStringSchema(), pro)
// 转换kafka数据类型为flink的dataStream类型
val ds: DataStream[String] = env.addSource(consumer)
//打印
// ds.print();
//统计出5s内车牌出现的总次数并打印到控制台
//先过滤清洗数据
/val ds: DataStream[(String, Int)] = ds.filter(_.nonEmpty).map(data => {
val dataArr: Array[String] = data.split(",")
dataArr(0) = “车牌总数”
(dataArr(0), 1)
})
/
ds.flatMap(.split(" ")).map((, 1)).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print()
//统计出5s内所有车牌尾号是偶数的车牌号码并打印到控制台
/ds.filter(_.nonEmpty).map(data=>{
val dataarr: Array[String] = data.split(",")
(dataarr(0).trim,1)
})
//偶数车牌
.filter(x=>{x._1.substring(x._1.length).toInt%2==0}).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print()
/

val process: DataStream[(String, Int)] = ds.process(new NumberIf).flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1)
process.print()
//打印偶数
process.getSideOutput(new OutputTag[String]("odd")).print("oushu:")
//统计出10s内同一车牌出现次数超过2次及以上的车牌号码并打印到控制台
ds.flatMap(_.split(" ")).map((_,1)).keyBy(0).timeWindow(Time.seconds(10)).sum(1).filter(data=>data._2>=2).print("10s exceed 2:")

env.execute("car from kafka")

}
class NumberIf extends ProcessFunction[String,String]{
lazy val evenOutPut = new OutputTagString
override def processElement(value: String, context: ProcessFunction[String, String]#Context, collector: Collector[String]): Unit = {
if (value.substring(value.length-1,value.length).toInt%2!=0){
context.output(evenOutPut,“odd:”+value)
}else{
collector.collect(“even:”+value)
}
}
}
}

你可能感兴趣的:(测试,大数据,flume,flink)