Flink如何实现3个实时流同时join,leftjoin,rightjoin

还有几分钟就登记了,目前在哈尔滨飞往北京的候机厅。由于晚上回去很晚,第二天忙活没时间更新文章,挤时间整理了一下。

Flink如何实现3个实时流同时join?整体思路就是:

设置相同的时间类型设置相同的时间窗口,这样就会到达相同窗口时,3个实时流会同时触发。

由于flink不支持3个实时流同时join,你需要先把2个实时流join完成的结果,再跟第三个实时流join。

import java.util	
import SessionIdKeyedProcessFunction.MyTimeTimestampsAndWatermarks	
import org.apache.flink.streaming.api.TimeCharacteristic	
import org.apache.flink.streaming.api.functions.{AssignerWithPeriodicWatermarks, AssignerWithPunctuatedWatermarks}	
import org.apache.flink.streaming.api.scala._	
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment	
import org.apache.flink.streaming.api.watermark.Watermark	
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows	
import org.apache.flink.streaming.api.windowing.time.Time	
import org.apache.flink.util.Collector	
object FlinkWindow {	
  class MyTimeTimestampsAndWatermarks extends AssignerWithPeriodicWatermarks[(String,Int)] with Serializable{	
    //生成时间戳	
    val maxOutOfOrderness = 3500L // 3.5 seconds	
    var currentMaxTimestamp: Long = _	
    override def extractTimestamp(element: (String,Int), previousElementTimestamp: Long): Long = {	
      val timestamp = System.currentTimeMillis()	
      currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp)	
      timestamp	
    }	
    override def getCurrentWatermark(): Watermark = {	
      // return the watermark as current highest timestamp minus the out-of-orderness bound	
      new Watermark(currentMaxTimestamp - maxOutOfOrderness);	
    }	
  }	
  def main(args: Array[String]): Unit = {	
    val env = StreamExecutionEnvironment.getExecutionEnvironment	
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)	
    val input = env.socketTextStream("localhost", 9001)	
    val inputMap = input.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	

	
    inputMap.print()	
    val input1 = env.socketTextStream("localhost", 9002)	
    val inputMap1 = input1.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	
    inputMap1.print()	
    val input2 = env.socketTextStream("localhost", 9003)	
    val inputMap2 = input2.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	
    inputMap2.print()	
    val aa = inputMap.join(inputMap1).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))	
    .apply{(t1:(String,Int),t2:(String,Int), out : Collector[(String,Int,Int)])=>	
      out.collect(t1._1,t1._2,t2._2)	
    }	
  aa.print()	
      val cc = aa.join(inputMap2).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))	
      .apply{(t1:(String,Int,Int),t2:(String,Int), out : Collector[(String,Int,Int,Int)])=>	
        out.collect(t1._1,t1._2,t1._3,t2._2)	
      }	
    cc.print()	
    env.execute()	
  }	
}

leftjoin,rightjoin由于flink官网没有明确指出实现方案,join算子无法实现,大家需要用cogroup来实现leftjoin和rightjoin,大家可以参考这个改一下就可以了

import util.source.StreamDataSource1;	
import util.source.StreamDataSource;	
import org.apache.flink.api.common.functions.CoGroupFunction;	
import org.apache.flink.api.java.functions.KeySelector;	
import org.apache.flink.api.java.tuple.Tuple3;	
import org.apache.flink.api.java.tuple.Tuple5;	
import org.apache.flink.streaming.api.TimeCharacteristic;	
import org.apache.flink.streaming.api.datastream.DataStream;	
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;	
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;	
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;	
import org.apache.flink.streaming.api.windowing.time.Time;	
import org.apache.flink.util.Collector;	
public class FlinkTumblingWindowsLeftJoinDemo {	
    public static void main(String[] args) throws Exception {	
        int windowSize = 10;	
        long delay = 5100L;	
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();	
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);	
        env.setParallelism(1);	
        // 设置数据源	
        DataStream> leftSource = env.addSource(new StreamDataSource()).name("Demo Source");	
        DataStream> rightSource = env.addSource(new StreamDataSource1()).name("Demo Source");	
        // 设置水位线	
        DataStream> leftStream = leftSource.assignTimestampsAndWatermarks(	
            new BoundedOutOfOrdernessTimestampExtractor>(Time.milliseconds(delay)) {	
                @Override	
                public long extractTimestamp(Tuple3 element) {	
                    return element.f2;	
                }	
            }	
        );	
        DataStream> rigjhtStream = rightSource.assignTimestampsAndWatermarks(	
            new BoundedOutOfOrdernessTimestampExtractor>(Time.milliseconds(delay)) {	
                @Override	
                public long extractTimestamp(Tuple3 element) {	
                    return element.f2;	
                }	
            }	
        );	
        // join 操作	
        leftStream.coGroup(rigjhtStream)	
            .where(new LeftSelectKey()).equalTo(new RightSelectKey())	
            .window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))	
            .apply(new LeftJoin())	
            .print();	
        env.execute("TimeWindowDemo");	
    }	
    public static class LeftJoin implements CoGroupFunction, Tuple3, Tuple5> {	
        @Override	
        public void coGroup(Iterable> leftElements, Iterable> rightElements, Collector> out) {	
            for (Tuple3 leftElem : leftElements) {	
                boolean hadElements = false;	
                for (Tuple3 rightElem : rightElements) {	
                    out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, rightElem.f1, leftElem.f2, rightElem.f2));	
                    hadElements = true;	
                }	
                if (!hadElements) {	
                    out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, "null", leftElem.f2, -1L));	
                }	
            }	
        }	
    }	
    public static class LeftSelectKey implements KeySelector, String> {	
        @Override	
        public String getKey(Tuple3 w) {	
            return w.f0;	
        }	
    }	
    public static class RightSelectKey implements KeySelector, String> {	
        @Override	
        public String getKey(Tuple3 w) {	
            return w.f0;	
        }	
    }

想看更多大厂技术干货分享?请关注下方公号,回复“spark”“flink”“机器学习”“前端”即可获取海量学习资料。

Flink如何实现3个实时流同时join,leftjoin,rightjoin_第1张图片

你可能感兴趣的:(Flink如何实现3个实时流同时join,leftjoin,rightjoin)