连接kafka
import com.tc.flink.conf.KafkaConfig;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Json;
import org.apache.flink.table.descriptors.Kafka;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
.....
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamTableEnvironment tableEnvironment = TableEnvironment.getTableEnvironment(env);
Kafka kafkaConnect=new Kafka().version("0.11").topic("topic-test").startFromLatest().property("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST).property("group.id", "trafficwisdom-streaming");
Schema tableSchema=new Schema().field("proctime", Types.SQL_TIMESTAMP).proctime()
.field("interruptCode", Types.STRING).field("interruptMsg", Types.STRING).field("requestId", Types.STRING).field("transferType", Types.STRING);
tableEnvironment.connect(kafkaConnect).withFormat(new Json().failOnMissingField(true).deriveSchema()).withSchema(tableSchema).inAppendMode().registerTableSource("search_log_error");
说明:因为我的kafka消息是json,所以我选取new Json
必须定义Schema,interruptCode、interruptMsg、requestId、transferType且类型全为String类型
kafka消息格式为
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"565f3d5ef47705fd92be40c70d09c135","transferType":"TF"}
{"interruptCode":"D02","interruptMsg":"回头路规则过滤了所有方案","requestId":"36d1496be24ad63d1d1d6e6bbfaf1e4d","transferType":"TB"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"565f3d5ef47705fd92be40c70d09c135","transferType":"FT"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"2c0971622719a1dbf0734e871b761ee8","transferType":"BT"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"c4c13b340ea4d7eb5f63c0e4add8f07f","transferType":"BT"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"23b029808b3942d80e174f58156247f2","transferType":"TB"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"dc3f0fcad202fb9e1319a25710b5644b","transferType":"FF"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"3f1c4fdd99ec9ec0ac4f1a13a4fb4346","transferType":"TB"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"490eebfeb61cff2c14ae8e36e3314d32","transferType":"TB"}
{"interruptCode":"E03","interruptMsg":"供应商响应异常-第二程所有余票请求无结果","requestId":"ddf69e8249857dcae485cea3a4cdf1bf","transferType":"BT"}
{"interruptCode":"C01","interruptMsg":"两程无余票","requestId":"8292ae585d1ca1df23008b131f172087","transferType":"BT"}
{"interruptCode":"C02","interruptMsg":"第一程无余票","requestId":"4ea56235b025e9d3a0eae62ae0d3093e","transferType":"FF"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"00485e6984ee90c33a60878129ae6a39","transferType":"TT"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"eebe79196da3227845460c0bd8125e92","transferType":"FT"}
{"interruptCode":"C02","interruptMsg":"第一程无余票","requestId":"c600026a13731f297176ea714e04747c","transferType":"BT"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"7fb3d6625885de1464d9e650a170c5ed","transferType":"TT"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"eebe79196da3227845460c0bd8125e92","transferType":"TB"}
{"interruptCode":"C02","interruptMsg":"第一程无余票","requestId":"09519ed35de88a3bc177febe81ba82fa","transferType":"BT"}
{"interruptCode":"E03","interruptMsg":"供应商响应异常-第二程所有余票请求无结果","requestId":"b66b20e1b52ed90fae861c86a647b524","transferType":"TB"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"6f5022b233488c0ec5b657f702d4a490","transferType":"BT"}
{"interruptCode":"C02","interruptMsg":"第一程无余票","requestId":"da140baa4fe6499d97e862241b07a8e0","transferType":"TB"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"95dd55f6fd34c2d3f3a62d74960f8aa6","transferType":"TB"}
{"interruptCode":"C02","interruptMsg":"第一程无余票","requestId":"da140baa4fe6499d97e862241b07a8e0","transferType":"BT"}
{"interruptCode":"D01","interruptMsg":"组装方案无结果","requestId":"ba6c5bc19e3727cf88b3949fa0cfcf3f","transferType":"TB"}
{"interruptCode":"B01","interruptMsg":"离线计算-无中转城市","requestId":"d187f28174fb3bcfe5bc7a25a760d251","transferType":"BT"}
{"interruptCode":"C01","interruptMsg":"两程无余票","requestId":"27e6892c41cc6c7eee2bb5cddac3c304","transferType":"BT"}
执行sql
String sql="select * from search_log_error";
Table result = tableEnvironment.sqlQuery(sql);
result.printSchema();
tableEnvironment.toAppendStream(result, Row.class).print();
env.execute();
返回结果
root
|-- interruptCode: String
|-- interruptMsg: String
|-- requestId: String
|-- transferType: String
1> D01,组装方案无结果,490eebfeb61cff2c14ae8e36e3314d32,TB
1> D01,组装方案无结果,490eebfeb61cff2c14ae8e36e3314d32,TB
现在修改sql做groupby操作。
String sql= "select transferType,errorType,count(*) as error_num from (select transferType,substring(interruptCode,1,1) as errorType from search_log_error) A group by transferType,errorType";
报错
Exception in thread "main" org.apache.flink.table.api.TableException: Table is not an append-only table. Use the toRetractStream() in order to handle add and retract messages.
at org.apache.flink.table.api.StreamTableEnvironment.translate(StreamTableEnvironment.scala:920)
at org.apache.flink.table.api.StreamTableEnvironment.translate(StreamTableEnvironment.scala:896)
at org.apache.flink.table.api.java.StreamTableEnvironment.toAppendStream(StreamTableEnvironment.scala:308)
at org.apache.flink.table.api.java.StreamTableEnvironment.toAppendStream(StreamTableEnvironment.scala:262)
at com.tc.flink.demo.SearchErrorStats.main(SearchErrorStats.java:33)
将toAppendStream修改为toRetractStream模式,支持update操作具体说明看代码的说明
String sql= "select transferType,errorType,count(*) as error_num from (select transferType,substring(interruptCode,1,1) as errorType from search_log_error) A group by transferType,errorType";
Table result = tableEnvironment.sqlQuery(sql);
result.printSchema();
// tableEnvironment.toAppendStream(result, Row.class).print();
tableEnvironment.toRetractStream(result, Row.class).print();
env.execute();
返回结果
root
|-- transferType: String
|-- errorType: String
|-- error_num: Long
4> (true,TB,D,1)
2> (true,BT,E,1)
4> (true,BT,C,1)
2> (true,FF,C,1)
3> (false,FT,B,2)
3> (true,FT,B,3
说明true和false区别,看StreamTableEnvironment.toRetractStream源码
A true [[JBool]] flag indicates an add message, a false flag indicates a retract message.
在RetractModel模式中,比如原始记录为(FT,B,2), 如果增加(FT,B,1)一条记录,
它会先把(FT,B,2)删除,然后新增(FT,B,2+1)这条记录,这种操作是低效的。
Flink支持三种流处理时间
Processing time 执行相应操作机器的系统时间
Event time 由它的生产者发送消息的时间(比如服务器产生消息的时间字段)
Ingestion time 指消息进入flink的时间
结构如下图:
修改schema,增加了proctime(processing-time)时间,如果想用event-time可以用rowtime()
tableEnvironment.connect(
new Kafka().version("0.11").topic("topic-test").startFromLatest().property("bootstrap.servers", KafkaConfig.KAFKA_BROKER_LIST).property("group.id", "trafficwisdom-streaming"))
.withFormat(new Json().failOnMissingField(true).deriveSchema())
.withSchema(new Schema()
.field("proctime", Types.SQL_TIMESTAMP).proctime()
.field("interruptCode", Types.STRING).field("interruptMsg", Types.STRING).field("requestId", Types.STRING).field("transferType", Types.STRING))
.inAppendMode()
.registerTableSource("search_log_error");
修改sql
String sql = "select transferType,errorType,count(*) as num from (select transferType,substring(interruptCode,1,1) as errorType,proctime from search_log_error) A group by HOP(proctime,INTERVAL '20' SECOND ,INTERVAL '2' MINUTE), transferType,errorType";
对过去2分钟数据,每20秒统计一次
返回结果
root
|-- transferType: String
|-- errorType: String
|-- num: Long
1> (true,TT,D,1)
4> (true,BT,D,1)
3> (true,FT,B,1)
4> (true,BT,D,3)
3> (true,FT,B,1)
1> (true,TT,D,1)
3> (true,FT,B,1)
4> (true,BT,D,3)
1> (true,TT,D,1)
1> (true,TT,D,1)
4> (true,BT,D,3)
3> (true,FT,B,1)
1> (true,TT,D,1)
3> (true,FT,B,1)
4> (true,BT,D,3)