"name": "Tom"
-- >
DDL : name string
"title": {
"title_name": "表情包",
"title_number":3
}
-->
DDL : title row<title_name string, title_number int>
"user_info": [{
"address": "北京市",
"city": "beijing"
}, {
"address": "上海市",
"city": "shanghai"
}]
-->
DDL : user_info array<row<address string, city string>>
"time_info": {
"timestamp": 1657332118000
}
-->
DDL : time_info map<string, bigint>
CREATE TABLE kafka_source (
name string,
title row<title_name string, title_number int>,
user_info array<row<address string, city string>>,
time_info map<string, bigint>
) WITH (
'connector' = 'kafka', -- kafka connector
'topic' = 'xxx', -- kafka topic
'properties.bootstrap.servers' = 'ip1:9092,ip2:9092,ip3:9092', -- kafka ip和端口信息
'properties.group.id' = 'xxx', -- 消费者组
'scan.startup.mode' = 'latest-offset', -- 从最新的 offset 开始读取,value值可为 latest-offset | earliest-offset | timestamp, 如果为timestamp,则必须添加'scan.startup.timestamp-millis' = '1656910800000'设置,标识从那个时间戳开始消费
'format' = 'json', -- 数据源格式为 json
'json.fail-on-missing-field' = 'false', -- 如果缺少字段,是否失败
'json.ignore-parse-errors' = 'true' -- 跳过带有解析错误的字段和行,而不是失败。如果出现错误,字段将设置为 null
)
注意事项:
反引号修饰
例如:`timestamp`select
name
, title
, title.title_name as title_name
, title.title_number as title_number
, user_info
, user_info[1].address as address
, time_info
, time_info['timestamp'] as `timestamp`
from kafka_source
±—±-------------------------------±-------------------------------±-------------------------------±-------------±-------------------------------±-------------------------------±-------------------------------±---------------------+
| op | name | title | title_name | title_number | user_info | address | time_info | timestamp |
±—±-------------------------------±-------------------------------±-------------------------------±-------------±-------------------------------±-------------------------------±-------------------------------±---------------------+
| +I | Tom1 | +I[表情包, 3] | 表情包 | 3 | [+I[北京市, beijing], +I[上… | 北京市 | {timestamp=1657332118000} | 1657332118000 |