订单流和对账流connect后是两个watermark
定时器是区分key的
定时器的timeStamp是流视角的
package flinkProject
import java.text.SimpleDateFormat
import flinkSourse.SensorReading
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.co.CoProcessFunction
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
case class ReceiptEvent(txid:String,payChannel:String,timestamp:Long)
case class OrderEvent(txid:String,payChannel:String,timestamp:Long)
object TxConnectedMatch {
def main(args: Array[String]): Unit = {
val executionEnvironment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
executionEnvironment.setParallelism(1)
executionEnvironment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) //watermark周期性生成,默认是200ms
val stream1: DataStream[String] = executionEnvironment.socketTextStream("127.0.0.1", 1111)
val receiptDataStream: DataStream[ReceiptEvent] = stream1.map(data => {
val tmpList = data.split(" ")
val simpleDateFormat = new SimpleDateFormat("dd/mm/yy:HH:mm:ss")
val ts = simpleDateFormat.parse(tmpList(2)).getTime
ReceiptEvent(tmpList(0), tmpList(1), ts)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[ReceiptEvent](Time.seconds(0)) {
override def extractTimestamp(t: ReceiptEvent) = t.timestamp
})
val stream2: DataStream[String] = executionEnvironment.socketTextStream("127.0.0.1", 2222)
val orderStram: DataStream[OrderEvent] = stream2.map(data => {
val tmpList = data.split(" ")
val simpleDateFormat = new SimpleDateFormat("dd/mm/yy:HH:mm:ss")
val ts = simpleDateFormat.parse(tmpList(2)).getTime
OrderEvent(tmpList(0), tmpList(1), ts)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[OrderEvent](Time.seconds(0)) {
override def extractTimestamp(t: OrderEvent) = t.timestamp
})
val result: DataStream[(ReceiptEvent, OrderEvent)] = receiptDataStream.connect(orderStram)
.keyBy((receipt => receipt.txid), (order => order.txid))
.process(new ConnectedCoProcessFunction())
result.print("result")
result.getSideOutput(new OutputTag[OrderEvent]("order_output_tag")).print("order_output_tag")
result.getSideOutput(new OutputTag[ReceiptEvent]("receipt_output_tag")).print("receipt_output_tag ")
executionEnvironment.execute("connected Stream")
}
}
class ConnectedCoProcessFunction extends CoProcessFunction[ReceiptEvent,OrderEvent,(ReceiptEvent,OrderEvent)] {
var receiptValueState:ValueState[ReceiptEvent]=_
var orderValueState:ValueState[OrderEvent]=_
override def open(parameters: Configuration): Unit = {
receiptValueState=getRuntimeContext.getState[ReceiptEvent](new ValueStateDescriptor[ReceiptEvent]("receipt",classOf[ReceiptEvent]))
orderValueState=getRuntimeContext.getState[OrderEvent](new ValueStateDescriptor[OrderEvent]("order",classOf[OrderEvent]))
}
override def processElement1(in1: ReceiptEvent, context: CoProcessFunction[ReceiptEvent, OrderEvent, (ReceiptEvent, OrderEvent)]#Context, collector: Collector[(ReceiptEvent, OrderEvent)]): Unit = {
var order=orderValueState.value()
//订单先来
if(order!=null){
collector.collect((in1,order))
orderValueState.clear()
}else{
receiptValueState.update(in1)
context.timerService().registerEventTimeTimer(in1.timestamp+3000l)
}
}
override def processElement2(in2: OrderEvent, context: CoProcessFunction[ReceiptEvent, OrderEvent, (ReceiptEvent, OrderEvent)]#Context, collector: Collector[(ReceiptEvent, OrderEvent)]): Unit = {
var receipt=receiptValueState.value()
//receipt先来
if(receipt!=null){
collector.collect(receipt,in2)
receiptValueState.clear()
}else{
orderValueState.update(in2)
context.timerService().registerEventTimeTimer(in2.timestamp+3000l)
}
}
override def onTimer(timestamp: Long, ctx: CoProcessFunction[ReceiptEvent, OrderEvent, (ReceiptEvent, OrderEvent)]#OnTimerContext, out: Collector[(ReceiptEvent, OrderEvent)]): Unit = {
if(receiptValueState.value()!=null){
ctx.output(new OutputTag[ReceiptEvent]("receipt_output_tag"),receiptValueState.value())
}
if(orderValueState.value()!=null){
ctx.output(new OutputTag[OrderEvent]("order_output_tag"),orderValueState.value() )
}
receiptValueState.clear()
orderValueState.clear()
}
}
正常的只要两个流有匹配的txId就会输出,会等3s的时间,3s以后来的状态会被清空就匹配不上了
定时器的timestamp是不区分key的,是项目整体视角的,但是定时器是按照每个key区分的,清空的状态也是每个key的状态
只有一个流里面有的时候,定时器延迟3s,每个流根据自己的watermark,如下
流1输入:4 404 17/05/2015:10:26:45 不会有输出
流2输入:6 505 17/05/2015:10:26:55 不会有输出
流1输入:5 404 17/05/2015:10:26:47 不会有输出,
流1输入:7 404 17/05/2015:10:26:49
输出:receipt_output_tag > ReceiptEvent(4,404,1421461605000)
流1输入:9 404 17/05/2015:10:26:59 watermark是10:26:59
输出:
receipt_output_tag > ReceiptEvent(5,404,1421461607000)
receipt_output_tag > ReceiptEvent(7,404,1421461609000)
流2输入:8 505 17/05/2015:10:26:56 不会有输出
流2输入:1 505 17/05/2015:10:27:01
输出:
order_output_tag> OrderEvent(6,505,1421461615000)
order_output_tag> OrderEvent(8,505,1421461616000)
package flinkProject
import java.text.SimpleDateFormat
import flinkSourse.SensorReading
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.co.{CoProcessFunction, ProcessJoinFunction}
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
case class ReceiptEvent(txid:String,payChannel:String,timestamp:Long)
case class OrderEvent(txid:String,payChannel:String,timestamp:Long)
object TxConnectedMatch {
def main(args: Array[String]): Unit = {
val executionEnvironment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
executionEnvironment.setParallelism(1)
executionEnvironment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) //watermark周期性生成,默认是200ms
val stream1: DataStream[String] = executionEnvironment.socketTextStream("127.0.0.1", 1111)
val receiptDataStream: DataStream[ReceiptEvent] = stream1.map(data => {
val tmpList = data.split(" ")
val simpleDateFormat = new SimpleDateFormat("dd/mm/yy:HH:mm:ss")
val ts = simpleDateFormat.parse(tmpList(2)).getTime
ReceiptEvent(tmpList(0), tmpList(1), ts)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[ReceiptEvent](Time.seconds(0)) {
override def extractTimestamp(t: ReceiptEvent) = t.timestamp
})
val stream2: DataStream[String] = executionEnvironment.socketTextStream("127.0.0.1", 2222)
val orderStram: DataStream[OrderEvent] = stream2.map(data => {
val tmpList = data.split(" ")
val simpleDateFormat = new SimpleDateFormat("dd/mm/yy:HH:mm:ss")
val ts = simpleDateFormat.parse(tmpList(2)).getTime
OrderEvent(tmpList(0), tmpList(1), ts)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[OrderEvent](Time.seconds(0)) {
override def extractTimestamp(t: OrderEvent) = t.timestamp
})
//方式2 intervalJoin
val keyedReceiptDataStream: KeyedStream[ReceiptEvent, String] = receiptDataStream.keyBy(_.txid)
val keyedOrderStram: KeyedStream[OrderEvent, String] = orderStram.keyBy(_.txid)
keyedReceiptDataStream
.intervalJoin(keyedOrderStram)
.between(Time.seconds(3),Time.seconds(3))
.process(new TxIntervalProcessJoinFunctin())
executionEnvironment.execute("connected Stream")
}
}
class TxIntervalProcessJoinFunctin() extends ProcessJoinFunction[ReceiptEvent,OrderEvent,(ReceiptEvent,OrderEvent)] {
override def processElement(in1: ReceiptEvent, in2: OrderEvent, context: ProcessJoinFunction[ReceiptEvent, OrderEvent, (ReceiptEvent, OrderEvent)]#Context, collector: Collector[(ReceiptEvent, OrderEvent)]): Unit = {
collector.collect((in1,in2))
}
}