统计周期 | 统计粒度 | 指标 |
---|---|---|
最近1、7、30日 | 品牌 | 订单数 |
最近1、7、30日 | 品牌 | 订单人数 |
最近1、7、30日 | 品牌 | 退单数 |
最近1、7、30日 | 品牌 | 退单人数 |
统计周期 | 统计粒度 | 指标 |
---|---|---|
最近1、7、30日 | 品类 | 订单数 |
最近1、7、30日 | 品类 | 订单人数 |
最近1、7、30日 | 品类 | 退单数 |
最近1、7、30日 | 品类 | 退单人数 |
2.构建指标体系,对于需求进行指标分析,分析出每个需求对应什么类型指标
4.设计DWS层汇总表,根据刚刚梳理的指标体系表格,梳理出DWS层需要创建哪些表格。
DWS层表名的命名规范为:dws_数据域_统计粒度_业务过程_统计周期(1d/nd/td)
5.创建dwd_trade_tm_order_1d表格的DDL语句
create external table dws_trade_tm_order_1d
(
tm_id string comment '品牌id',
tm_name string comment '品牌名称',
order_count bigint comment '最近1日下单次数',
order_user_count bigint comment '最近1日下单人数',
order_num bigint comment '最近1日下单件数',
order_total_amount decimal(16,2) comment '最近1日下单金额'
) comment '交易域品牌粒度订单最近1日汇总事实表'
partition by (dt string)
stored as orc
location '/warehouse/gmall/dws/dws_trade_tm_order_1d'
tblproperties('orc.compress'='snappy')
insert overwrite table dws_trade_tm_order_1d partition(dt='2020-06-14'
SELECT
tm_id,
tm_name,
COUNT(1),
count(DISTINCT (user_id)),
sum(sku_num),
sum(split_total_amount)
from
(
SELECT
sku_id, user_id, sku_num, split_total_amount
from
dwd_trade_order_detail_inc
where
dt = '2020-06-14' )od
left JOIN (
select
id, tm_id, tm_name
FROM
dim_sku_full
where
dt = '2020-06-14' )sku on
od.sku_id = sku.id
GROUP by
tm_id,
tm_name;
create external table dws_trade_tm_order_nd
(
tm_id string comment '品牌id',
tm_name string comment '品牌名称',
order_count_7d bigint comment '最近7日下单次数',
order_user_count_7d bigint comment '最近7日下单人数',
order_num_7d bigint comment '最近7日下单件数',
order_total_amount_7d decimal(16,2) comment '最近7日下单金额',
order_count_30d bigint comment '最近30日下单次数',
order_user_count_30d bigint comment '最近30日下单人数',
order_num_30d bigint comment '最近30日下单件数',
order_total_amount_30d decimal(16,2) comment '最近30日下单金额'
) comment '交易域品牌粒度订单最近7日和30日汇总事实表'
partition by (dt string)
stored as orc
location '/warehouse/gmall/dws/dws_trade_tm_order_nd'
tblproperties('orc.compress'='snappy')
insert overwrite table dws_trade_tm_order_nd partition(dt='2020-06-14')
select
tm_id,
tm_name,
sum(if(dt>=date_sub('2020-06-14',6),order_count,0)), //计算最近7天的数据
sum(if(dt>=date_sub('2020-06-14',6),order_user_count,0)),
sum(if(dt>=date_sub('2020-06-14',6),order_num,0)),
sum(if(dt>=date_sub('2020-06-14',6),order_total_amount,0)),
sum(order_count),
sum(order_user_count),
sum(order_num),
sum(order_total_amount),
from dws_trade_tm_order_1d
where dt >= date_sub('2020-06-14',29)
group by tm_id,tm_name;
create external table dws_trade_user_tm_order_1d
(
user_id string comment '用户id',
tm_id string comment '品牌id',
tm_name string comment '品牌名称',
order_count bigint comment '最近1日下单次数',
order_num bigint comment '最近1日下单件数',
order_total_amount decimal(16,2) comment '最近1日下单金额'
) comment '交易域用户品牌粒度订单最近1日汇总事实表'
partition by (dt string)
stored as orc
location '/warehouse/gmall/dws/dws_trade_tm_order_1d'
tblproperties('orc.compress'='snappy')
insert overwrite table dws_trade_user_tm_order_1d partition(dt='2020-06-14'
SELECT
user_id,
tm_id,
tm_name,
COUNT(1),
sum(sku_num),
sum(split_total_amount)
from
(
SELECT
sku_id, user_id, sku_num, split_total_amount
from
dwd_trade_order_detail_inc
where
dt = '2020-06-14' )od
left JOIN (
select
id, tm_id, tm_name
FROM
dim_sku_full
where
dt = '2020-06-14' )sku on
od.sku_id = sku.id
GROUP by
user_id,
tm_id,
tm_name;
create external table dws_trade_user_tm_order_nd
(
user_id string comment '用户id',
tm_id string comment '品牌id',
tm_name string comment '品牌名称',
order_count_7d bigint comment '最近7日下单次数',
order_num_7d bigint comment '最近7日下单件数',
order_total_amount_7d decimal(16,2) comment '最近7日下单金额',
order_count_30d bigint comment '最近30日下单次数',
order_num_30d bigint comment '最近30日下单件数',
order_total_amount_30d decimal(16,2) comment '最近30日下单金额'
) comment '交易域用户品牌粒度订单最近7日和30日汇总事实表'
partition by (dt string)
stored as orc
location '/warehouse/gmall/dws/dws_trade_tm_order_nd'
tblproperties('orc.compress'='snappy')
insert overwrite table dws_trade_user_tm_order_nd partition(dt='2020-06-14')
select
user_id ,
tm_id,
tm_name,
sum(if(dt>=date_sub('2020-06-14',6),order_count,0)), //计算最近7天的数据
sum(if(dt>=date_sub('2020-06-14',6),order_num,0)),
sum(if(dt>=date_sub('2020-06-14',6),order_total_amount,0)),
sum(order_count),
sum(order_num),
sum(order_total_amount),
from dws_trade_tm_order_1d
where dt >= date_sub('2020-06-14',29)
group by user_id ,tm_id,tm_name;
DROP TABLE IF EXISTS dws_trade_user_sku_order_1d;
CREATE EXTERNAL TABLE dws_trade_user_sku_order_1d
(
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'sku_id',
`sku_name` STRING COMMENT 'sku名称',
`category1_id` STRING COMMENT '一级分类id',
`category1_name` STRING COMMENT '一级分类名称',
`category2_id` STRING COMMENT '一级分类id',
`category2_name` STRING COMMENT '一级分类名称',
`category3_id` STRING COMMENT '一级分类id',
`category3_name` STRING COMMENT '一级分类名称',
`tm_id` STRING COMMENT '品牌id',
`tm_name` STRING COMMENT '品牌名称',
`order_count_1d` BIGINT COMMENT '最近1日下单次数',
`order_num_1d` BIGINT COMMENT '最近1日下单件数',
`order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日活动优惠金额',
`coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日优惠券优惠金额',
`order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
) COMMENT '交易域用户商品粒度订单最近1日汇总事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_sku_order_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dws_trade_user_sku_order_nd;
CREATE EXTERNAL TABLE dws_trade_user_sku_order_nd
(
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'sku_id',
`sku_name` STRING COMMENT 'sku名称',
`category1_id` STRING COMMENT '一级分类id',
`category1_name` STRING COMMENT '一级分类名称',
`category2_id` STRING COMMENT '一级分类id',
`category2_name` STRING COMMENT '一级分类名称',
`category3_id` STRING COMMENT '一级分类id',
`category3_name` STRING COMMENT '一级分类名称',
`tm_id` STRING COMMENT '品牌id',
`tm_name` STRING COMMENT '品牌名称',
`order_count_7d` STRING COMMENT '最近7日下单次数',
`order_num_7d` BIGINT COMMENT '最近7日下单件数',
`order_original_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单原始金额',
`activity_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日活动优惠金额',
`coupon_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日优惠券优惠金额',
`order_total_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单最终金额',
`order_count_30d` BIGINT COMMENT '最近30日下单次数',
`order_num_30d` BIGINT COMMENT '最近30日下单件数',
`order_original_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单原始金额',
`activity_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近30日活动优惠金额',
`coupon_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近30日优惠券优惠金额',
`order_total_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单最终金额'
) COMMENT '交易域用户商品粒度订单最近n日汇总事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_sku_order_nd'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dws_trade_user_order_td;
CREATE EXTERNAL TABLE dws_trade_user_order_td
(
`user_id` STRING COMMENT '用户id',
`order_date_first` STRING COMMENT '首次下单日期',
`order_date_last` STRING COMMENT '末次下单日期',
`order_count_td` BIGINT COMMENT '下单次数',
`order_num_td` BIGINT COMMENT '购买商品件数',
`original_amount_td` DECIMAL(16, 2) COMMENT '原始金额',
`activity_reduce_amount_td` DECIMAL(16, 2) COMMENT '活动优惠金额',
`coupon_reduce_amount_td` DECIMAL(16, 2) COMMENT '优惠券优惠金额',
`total_amount_td` DECIMAL(16, 2) COMMENT '最终金额'
) COMMENT '交易域用户粒度订单历史至今汇总事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_order_td'
TBLPROPERTIES ('orc.compress' = 'snappy');
insert overwrite table dws_trade_user_order_td partition(dt='2020-06-14')
select
user_id,
min(dt) login_date_first,
max(dt) login_date_last,
sum(order_count_1d) order_count,
sum(order_num_1d) order_num,
sum(order_original_amount_1d) original_amount,
sum(activity_reduce_amount_1d) activity_reduce_amount,
sum(coupon_reduce_amount_1d) coupon_reduce_amount,
sum(order_total_amount_1d) total_amount
from dws_trade_user_order_1d
group by user_id;
insert overwrite table dws_trade_user_order_td partition(dt='2020-06-15')
select
nvl(old.user_id,new.user_id),
if(new.user_id is not null and old.user_id is null,'2020-06-15',old.order_date_first),
if(new.user_id is not null,'2020-06-15',old.order_date_last),
nvl(old.order_count_td,0)+nvl(new.order_count_1d,0),
nvl(old.order_num_td,0)+nvl(new.order_num_1d,0),
nvl(old.original_amount_td,0)+nvl(new.order_original_amount_1d,0),
nvl(old.activity_reduce_amount_td,0)+nvl(new.activity_reduce_amount_1d,0),
nvl(old.coupon_reduce_amount_td,0)+nvl(new.coupon_reduce_amount_1d,0),
nvl(old.total_amount_td,0)+nvl(new.order_total_amount_1d,0)
from
(
select
user_id,
order_date_first,
order_date_last,
order_count_td,
order_num_td,
original_amount_td,
activity_reduce_amount_td,
coupon_reduce_amount_td,
total_amount_td
from dws_trade_user_order_td
where dt=date_add('2020-06-15',-1)
)old
full outer join
(
select
user_id,
order_count_1d,
order_num_1d,
order_original_amount_1d,
activity_reduce_amount_1d,
coupon_reduce_amount_1d,
order_total_amount_1d
from dws_trade_user_order_1d
where dt='2020-06-15'
)new
on old.user_id=new.user_id;
SELECT
user_id,
min(order_date_first) ,
max(order_date_last),
sum(order_count_td),
sum(order_num_td),
sum(original_amount_td),
sum(activity_reduce_amount_td),
sum(coupon_reduce_amount_td),
sum(total_amount_td)
from
(
select
user_id, order_date_first, order_date_last, order_count_td, order_num_td, original_amount_td, activity_reduce_amount_td, coupon_reduce_amount_td, total_amount_td
from
dws_trade_user_order_td
where
dt = date_add('2020-06-15',-1)
UNION ALL
select
user_id, '2020-06-15', '2020-06-15', order_count_1d, order_num_1d, order_original_amount_1d, activity_reduce_amount_1d, coupon_reduce_amount_1d, order_total_amount_1d
from
dws_trade_user_order_1d
where
dt = '2020-06-15'
GROUP by
user_id ) t1
group by
user_id ;
hive 中sql语法:
开窗和分组