@Data
@AllArgsConstructor
@NoArgsConstructor
public static class Nan {
private String xing;
private String name;
private Long ts;
}
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
DataStreamSource sourceNan = env.socketTextStream("hdp01", 1111);
DataStreamSource sourceNv = env.socketTextStream("hdp01", 2222);
System.setProperty("java.net.preferIPv4Stack", "true");
SingleOutputStreamOperator beanNan = sourceNan.map(new MapFunction() {
@Override
public Nan map(String s) throws Exception {
try {
String[] split = s.split(",");
return new Nan(split[0].substring(0, 1), split[1], Long.parseLong(split[2]));
} catch (Exception e) {
return null;
}
}
}).filter(Objects::nonNull).assignTimestampsAndWatermarks(
WatermarkStrategy.forMonotonousTimestamps().withTimestampAssigner(new SerializableTimestampAssigner() {
@Override
public long extractTimestamp(Nan nan, long l) {
return nan.getTs();
}
})).returns(TypeInformation.of(Nan.class));
流转表的时候有一个点要注意,watermark必须要重新指定,否则会丢失,常用的方式如下
创建watermark,有两步,
第一步:必须要依据一个字段来创建watermark,这个字段必须是timestamp_ltz(3)的类型。
第二步:根据时间戳字段生成watermark
时间戳字段有两种获取方式
1、根据一个bigint字段进行转换
2、在流转表,且流上设置了watermark的情况下,根据内置属性rowtime创建,这个rowtime是流转表时暴露出来的事件时间
watermark也有两种获取方式
1、根据时间戳字段重新创建watermark
2、在流转表,且流上设置了watermark的情况下,沿用流上的watermark
下面是两种场景,只要记住第一种就行了,其实第二种没什么用。
tenv.createTemporaryView("nan", beanNan, Schema.newBuilder()
.column("xing", DataTypes.STRING())
.column("name", DataTypes.STRING())
.column("ts", DataTypes.BIGINT())
.columnByExpression("rt", "to_timestamp_ltz(ts,3)") // 根据一个bigint字段进行转换
.watermark("rt", "rt - interval '1' second ") // 重新创建watermark
.build());
tenv.createTemporaryView("nan1", beanNan, Schema.newBuilder()
.column("xing", DataTypes.STRING())
.column("name", DataTypes.STRING())
.column("ts", DataTypes.BIGINT())
.columnByMetadata("rt", DataTypes.TIMESTAMP_LTZ(3),"rowtime") // 根据内置属性rowtime创建
.watermark("rt", "source_watermark()") // 沿用流的watermark “source_watermark 等于 rt - interval '1' second”
.build());
TableResult tableResult = tenv.executeSql("select *,current_watermark(rt) from nan");
tableResult.print();
String source = "CREATE TABLE person ( " +
" xing STRING, " +
" name STRING, " +
" ts BIGINT, " +
" rt as to_timestamp_ltz(ts,3), " +
" watermark for rt as rt - interval '1' second " +
") WITH ( " +
" 'connector' = 'kafka', " +
" 'topic' = 'flink_topic', " +
" 'properties.bootstrap.servers' = '172.16.10.139:9092', " +
" 'properties.group.id' = 'testGroup', " +
" 'scan.startup.mode' = 'latest-offset', " +
" 'format' = 'json' " +
")";
tenv.executeSql(source);
表转流,可以沿用流上的watermark,不需要额外声明
DataStream dataStream = tenv.toDataStream(table);
dataStream.process(new ProcessFunction() {
@Override
public void processElement(Row value, ProcessFunction.Context ctx, Collector