贴一个曾经开发过的小需求,主要是按照产品与时间的细粒度对业务指标进行实时统计
几个月前的了,我使用的是阿里云的Stream Studio可以直接写Flink-SQL进行统计,近期我们将本地线上环境升级到了Flink-1.11.1社区版,Flink-SQL的语法基本与Blink一致,并且Flink-1.11.1社区增加了对Hive的交互支持。
一个小小的不满,Flink-1.11.1社区版并不能像阿里云Blink-Stream-Studio平台一样支持DDL的形式创建维度表,在1.11社区版中还是得用以前LookUpFunction的方式来做维度交互,不支持DDL创建维度表,这个我相信在未来的新版本中也会增加!毕竟要用SQL咱们就全都用SQL多好~
以下是我曾经基于阿里云Blink-Stream-Studio平台做的一个简单需求,我负责我司近20个业务渠道的实时计算需求与研发,很多人都会用Flink,但大家的需求场景各不相同,这里列举一个易懂的小项目:实时统计某个业务渠道的实时交易情况,退款情况,SKU实售情况,今日SKU-GMV。
--SQL
--********************************************************************--
--Author: guangyu
--CreateTime: 2020-06-16 20:01:25
--Comment: 请输入业务注释信息
--********************************************************************--
-- 数据源
-- CREATE TABLE src_youzan_data (
-- `data` VARCHAR
-- ) WITH (
-- type = 'datahub',
-- endPoint = 'http://dh-cn-beijing.aliyun-inc.com',
-- roleArn='acs:ram::199526033333379:role/aliyunstreamdefaultrole',
-- project = 'DC_TEST',
-- topic = 'src_youzan_data'
-- );
CREATE TABLE src_youzan_data (
`data` VARCHAR
) WITH (
type = 'datahub',
endPoint = 'http://dh-cn-beijing.aliyun-inc.com',
roleArn='acs:ram::199526033333379:role/aliyunstreamdefaultrole',
project = 'DC_xxxx',
topic = 'src_youzan_data'
);
-- 商品纬度表
CREATE TABLE dim_product (
id INT,
product_id VARCHAR,
product_code VARCHAR,
second_code VARCHAR,
`name` VARCHAR,
brand_name VARCHAR,
storage_location VARCHAR,
middle_code VARCHAR,
origin VARCHAR,
buy_specification VARCHAR,
big_category_name VARCHAR,
contents VARCHAR,
small_category_name VARCHAR,
brand_code VARCHAR,
sale_specification VARCHAR,
product_status VARCHAR,
wholesale_specification VARCHAR,
putaway_time VARCHAR,
last_manager VARCHAR,
ip VARCHAR,
adjust_price DECIMAL,
contract_price DECIMAL,
except_tax_price DECIMAL,
expiration_date INT,
lowest_price VARCHAR,
rate DOUBLE,
wholesale_price DECIMAL,
middle_name VARCHAR,
`month` VARCHAR,
supplier_name VARCHAR,
subdivide VARCHAR,
distribution_type VARCHAR,
supplier VARCHAR,
business_practice VARCHAR,
product_status_code VARCHAR,
unit VARCHAR,
`year` VARCHAR,
big_code VARCHAR,
distribution_specification VARCHAR,
updated VARCHAR,
small_code VARCHAR,
product_type VARCHAR,
simple_name VARCHAR,
specification VARCHAR,
unique index (second_code),
PERIOD FOR SYSTEM_TIME
) WITH (
type= 'rds',
url = 'jdbc:mysql://xxxx:36628/dcxxxx',
userName = 'guangyu',
password = 'qwer#123',
tableName = 'product'
);
-- 解析订单
CREATE VIEW src_youzan_order as
select
-- 主订单信息:
-- 有赞订单类型
JSON_VALUE(`data`,'$.type') as `type`,
-- 重发的次数
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.sendCount') as int) as `sendCount`,
-- 重发的次数
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.test') as boolean) as `test`,
-- 最终支付价格 payment=orders.payment的总和
CAST(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.pay_info'),'$.payment') as DOUBLE) as payment,
-- 主订单id
JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.tid') as tid,
-- 支付时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_time'),'+',' ') as pay_time,
-- 订单创建时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.created'),'+',' ') as created,
-- 订单修改时间
REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.update_time'),'+',' ') as update_time,
-- 订单状态
JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.status') as `status`,
-- 支付类型
CAST(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_type') as int) as pay_type,
-- 子订单信息:
-- 商品编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.outer_item_id') as outer_item_id,
-- 单商品现价,减去了商品的优惠金额
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.discount_price') as DOUBLE) as discount_price,
-- 商品数量
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.num') as int) as num,
-- 交易明细号
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.oid') as oid,
-- 商品名称
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.title') as title,
-- 是否赠品
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.is_present') as is_present,
-- 单商品原价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.price') as DOUBLE) as price,
-- 分销价格
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.fenxiao_price') as DOUBLE) as fenxiao_price,
-- 商品优惠后总价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.total_fee') as DOUBLE) as total_fee,
-- 商品别名
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.alias') as alias,
-- 商品最终均摊价
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.payment') as DOUBLE) as order_payment,
-- 商家编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.outer_sku_id') as outer_sku_id,
-- 商品详情链接
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.goods_url') as goods_url,
-- 商品id
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.item_id') as int) as item_id,
-- 海淘口岸编码
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.customs_code') as customs_code,
-- 商品积分价(非积分商品则为0)
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.points_price') as DOUBLE) as points_price,
-- 订单类型 0:普通类型商品; 1:拍卖商品; 5:餐饮商品; 10:分销商品; 20:会员卡商品; 21:礼品卡商品; 23:有赞会议商品; 24:周期购; 30:收银台商品; 31:知识付费商品; 35:酒店商品; 40:普通服务类商品; 182:普通虚拟商品; 183:电子卡券商品; 201:外部会员卡商品; 202:外部直接收款商品; 203:外部普通商品; 205:mock不存在商品; 206:小程序二维码
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.item_type') as int) as item_type,
-- 是否海淘订单, 1是海淘
JSON_VALUE(JSON_VALUE(`data`,'$.divide_order'),'$.is_cross_border') as is_cross_border,
-- 退款类数据
-- 退款金额
CAST(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.refunded_fee') as DOUBLE ) as `refunded_fee`,
-- 退款订单
JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.refund_id') as `refund_id`
from src_youzan_data;
-- where REPLACE(JSON_VALUE(JSON_VALUE(JSON_VALUE(JSON_VALUE(`data`,'$.msg'),'$.full_order_info'),'$.order_info'),'$.pay_time'),'+',' ') = CAST( CURRENT_DATE AS VARCHAR );
-- 已支付订单去重
-- create view order_distinct as
-- SELECT * from (
-- SELECT
-- *,
-- row_number() over ( PARTITION BY tid ORDER BY update_time DESC ) AS rn
-- FROM
-- src_youzan_order
-- WHERE
-- `type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
-- and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )
-- )distinct_order
-- where distinct_order.rn = 1;
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- 今日支付,今日订单数
create view gmv_print as
SELECT
SUBSTRING( pay_time, 1, 10 ) as `date`,
sum( gmv_tb.payment ) AS gmv,
CAST(count( gmv_tb.tid ) as int) AS order_count
FROM
(
SELECT
*,
row_number() over ( PARTITION BY tid ORDER BY update_time DESC ) AS rn
FROM
src_youzan_order
WHERE
`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )
-- pay_time like '2020-06-18%'
) gmv_tb where gmv_tb.rn=1
group by SUBSTRING( pay_time, 1, 10 )
having SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR );
-------------------------------------
-- 今日退款金额,退款订单
create view refund_print as
SELECT
SUBSTRING( update_time, 1, 10 ) as `date`,
sum( refund.refunded_fee ) AS refund_fee_total,
CAST(count( refund.refund_id ) as int) AS refund_order_total
FROM
(
SELECT
*,
row_number() over ( PARTITION BY refund_id ORDER BY update_time DESC ) AS rn
FROM
src_youzan_order
WHERE
`type` = 'trade_refund_RefundSuccess' -- 类型为用户退款成功
and SUBSTRING( update_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )
-- pay_time like '2020-06-18%'
) refund where refund.rn=1
group by SUBSTRING( update_time, 1, 10 )
having SUBSTRING( update_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) ;
-------------------------------------
-- 实时金额表
CREATE TABLE dc_xxx_channel_real (
`date` VARCHAR,
channel_id INT,
sum_price DOUBLE,
sum_cnt DOUBLE,
refund_cnt BIGINT,
refund_price DOUBLE,
-- updated_time DATE,
PRIMARY KEY (`date`,channel_id)
) WITH (
type= 'rds',
url = 'jdbc:mysql://xxxx:36628/dcxxxx',
userName = 'dcsxxx',
password = 'xxx',
tableName = 'dc_zzzzz_channel_real'
);
-- 写入数据库
insert into dc_xxx_channel_real
select
gp.`date`,
13 as channel_id,
gp.gmv as sum_price,
case when CAST(gp.order_count as DOUBLE) is null then cast(0 as double) else CAST(gp.order_count as DOUBLE) end as sum_cnt,
case when CAST(rp.refund_order_total as BIGINT) is null then cast(0 as bigint) else CAST(rp.refund_order_total as BIGINT) end as refund_cnt,
case when rp.refund_fee_total is null then cast(0 as double) else rp.refund_fee_total end as refund_price
from gmv_print gp
left join refund_print rp
on gp.`date` = rp.`date`;
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- 实时商品结果表
CREATE TABLE dc_xxx_product_real (
`date` VARCHAR,
channel_id INT,
`group` VARCHAR,
product_id VARCHAR,
`ip` VARCHAR,
big_category_name VARCHAR,
middle_name VARCHAR,
product_type VARCHAR,
series VARCHAR,
subdivide VARCHAR,
product_name VARCHAR,
sum_price DOUBLE,
sum_cnt BIGINT,
cnt_order BIGINT, -- 订单数
-- id INT,
PRIMARY KEY (`date`,channel_id,`group`,product_id)
) WITH (
type= 'rds',
url = 'jdbc:mysql://xxxx:36628/dcxxx',
userName = 'dcxxx',
password = 'xxxxxxx',
tableName = 'dc_xxxproduct_real'
);
-- 统计已支付商品分类,个数,写入数据库
insert into dc_xxxproduct_real
select
tmp.`date`,
13 as channel_id,
'无' as `group`,
tmp.outer_item_id as product_id,
case when `ip` is null then '无' else `ip` end as `ip`,
case when big_category_name is null then '无' else big_category_name end as big_category_name,
case when middle_name is null then '无' else middle_name end as middle_name,
case when product_type is null then '无' else product_type end as product_type,
case when simple_name is null then '无' else simple_name end as series,
CASE WHEN subdivide is null then '无' else subdivide end as subdivide,
case when dim_product.`name` is null then '无' else dim_product.`name` end as `product_name`,
tmp.sum_price as sum_price,
case when CAST(tmp.sum_cnt as BIGINT) is null then CAST(0 as BIGINT) else CAST(tmp.sum_cnt as BIGINT) end as sum_cnt,
case when CAST(tmp.cnt_order as BIGINT) is null then cast(0 as bigint) else CAST(tmp.cnt_order as BIGINT) end as cnt_order
from(
select
odt.outer_item_id, -- 商品ID
sum(odt.num) as sum_cnt, -- 商品累计个数
sum(odt.order_payment) as sum_price, -- 商品合计金额
count(odt.tid) as cnt_order, -- 主订单数,
SUBSTRING( pay_time, 1, 10 ) as `date`
from (
SELECT
*,
row_number() over ( PARTITION BY oid ORDER BY update_time DESC ) AS rn
FROM
src_youzan_order
WHERE
`type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )
) odt where odt.rn=1
group by odt.outer_item_id,SUBSTRING( pay_time, 1, 10 )
having SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR ) ) tmp
left join dim_product FOR SYSTEM_TIME AS OF PROCTIME ()
on
tmp.outer_item_id = dim_product.second_code
where tmp.outer_item_id is not null and tmp.outer_item_id <> '';
-- create table print_item(
-- outer_item_id varchar,
-- sum_cnt int,
-- sum_price double,
-- cnt_order BIGINT
-- )with(
-- type='print'
-- );
-- insert into print_item
-- select odt.outer_item_id, -- 商品ID
-- sum(odt.num) as sum_cnt, -- 商品累计个数
-- sum(odt.order_payment) as sum_price, -- 商品合计金额
-- count(odt.tid) as cnt_order -- 主订单数
-- from (
-- SELECT
-- *,
-- row_number() over ( PARTITION BY oid ORDER BY update_time DESC ) AS rn
-- FROM
-- src_youzan_order
-- WHERE
-- `type` = 'trade_TradeBuyerPay' -- 类型为用户已支付
-- and SUBSTRING( pay_time, 1, 10 ) = CAST( CURRENT_DATE AS VARCHAR )
-- ) odt where odt.rn=1
-- group by odt.outer_item_id;
-- insert into dc_xxx_product_real
-- select
-- CAST( CURRENT_DATE AS VARCHAR ) as `date`,
-- 13 as channel_id,
-- '无' as `group`,
-- youzan_product.outer_item_id as product_id,
-- case when `ip` is null then '无' else `ip` end as `ip`,
-- case when big_category_name is null then '无' else big_category_name end as big_category_name,
-- case when middle_name is null then '无' else middle_name end as middle_name,
-- case when product_type is null then '无' else product_type end as product_type,
-- case when simple_name is null then '无' else simple_name end as series,
-- CASE WHEN subdivide is null then '无' else subdivide end as subdivide,
-- case when dim_product.`name` is null then '无' else dim_product.`name` end as `product_name`,
-- youzan_product.sum_price as sum_price,
-- CAST(youzan_product.sum_cnt as BIGINT) as sum_cnt,
-- CAST(youzan_product.cnt_order as BIGINT) as cnt_order
-- from(
-- select order_distinct.outer_item_id, -- 商品ID
-- sum(order_distinct.num) as sum_cnt, -- 商品累计个数
-- sum(order_distinct.order_payment) as sum_price, -- 商品合计金额
-- count(tid) as cnt_order -- 主订单数
-- from order_distinct group by order_distinct.outer_item_id
-- ) youzan_product
-- left join
-- dim_product FOR SYSTEM_TIME AS OF PROCTIME ()
-- on youzan_product.outer_item_id = dim_product.second_code
-- where youzan_product.outer_item_id is NOT NULL;
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- create table test_pro (
-- outer_sku_id VARCHAR,
-- sum_cnt BIGINT,
-- sum_price DOUBLE,
-- cnt_order BIGINT
-- )WITH(
-- type = 'print'
-- );
-- -- 测试
-- insert into test_pro
-- select order_distinct.outer_sku_id, -- 商品ID
-- CAST(sum(order_distinct.num) as BIGINT) as sum_cnt, -- 商品累计个数
-- sum(order_distinct.order_payment) as sum_price, -- 商品合计金额
-- CAST(count(tid) as BIGINT) as cnt_order -- 主订单数
-- from order_distinct group by order_distinct.outer_sku_id