### --- DIM、DWD => 数据仓库分层、数据仓库理论
~~~ # 需求:计算当天
~~~ 全国所有订单信息
~~~ 全国、一级商品分类订单信息
~~~ 全国、二级商品分类订单信息
~~~ 大区所有订单信息
~~~ 大区、一级商品分类订单信息
~~~ 大区、二级商品分类订单信息
~~~ 城市所有订单信息
~~~ 城市、一级商品分类订单信息
~~~ 城市、二级商品分类订单信息
### --- 需要的信息:订单表、订单商品表、商品信息维表、商品分类维表、商家地域维表
~~~ 订单表 => 订单id、订单状态
~~~ 订单商品表 => 订单id、商品id、商家id、单价、数量
~~~ 商品信息维表 => 商品id、三级分类id
~~~ 商品分类维表 => 一级名称、一级分类id、二级名称、二级分类id、三级名称、三级分类id
~~~ 商家地域维表 => 商家id、区域名称、区域id、城市名称、城市id
~~~ 订单表、订单商品表、商品信息维表 => 订单id商品id、商家id、三级分类id、单价、数量(订单明细表)
~~~ 订单明细表、商品分类维表、商家地域维表
~~~ => 订单id、商品id、商家id、三级分类名称、三级分类名称、三级分类名称、单价、数量、区域、城市
~~~ => 订单明细宽表
### --- DWS层建表
~~~ # dws_trade_orders(订单明细)由以下表轻微聚合而成:
dwd.dwd_trade_orders (拉链表、分区表)
ods.ods_trade_order_product (分区表)
dim.dim_trade_product_info(维表、拉链表)
~~~ # dws_trade_orders_w(订单明细宽表)由以下表组成:
ads.dws_trade_orders (分区表)
dim.dim_trade_product_cat(分区表)
dim.dim_trade_shops_org(分区表)
### --- DWS层建表:订单明细表
~~~ # 订单明细表(轻度汇总事实表)。每笔订单的明细
DROP TABLE IF EXISTS dws.dws_trade_orders;
create table if not exists dws.dws_trade_orders(
orderid string, -- 订单id
cat_3rd_id string, -- 商品三级分类id
shopid string, -- 店铺id
paymethod tinyint, -- 支付方式
productsnum bigint, -- 商品数量
paymoney double, -- 订单商品明细金额
paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;
~~~ # 订单明细表宽表
DROP TABLE IF EXISTS dws.dws_trade_orders_w;
create table if not exists dws.dws_trade_orders_w(
orderid string, -- 订单id
cat_3rd_id string, -- 商品三级分类id
thirdname string, -- 商品三级分类名称
secondname string, -- 商品二级分类名称
firstname string, -- 商品一级分类名称
shopid string, -- 店铺id
shopname string, -- 店铺名
regionname string, -- 店铺所在大区
cityname string, -- 店铺所在城市
paymethod tinyint, -- 支付方式
productsnum bigint, -- 商品数量
paymoney double, -- 订单明细金额
paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;
### --- DWS层加载数据
~~~ dws_trade_orders/dws_trade_orders_w 中一笔订单可能出现多条记录!
[root@hadoop02 ~]# vim /data/yanqidw/script/trade/dws_load_trade_orders.sh
#!/bin/bash
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table dws.dws_trade_orders
partition(dt='$do_date')
select t1.orderid as orderid,
t3.categoryid as cat_3rd_id,
t3.shopid as shopid,
t1.paymethod as paymethod,
t2.productnum as productsnum,
t2.productnum*t2.productprice as pay_money,
t1.paytime as paytime
from (select orderid, paymethod, paytime
from dwd.dwd_trade_orders
where dt='$do_date') T1
left join
(select orderid, productid, productnum, productprice
from ods.ods_trade_order_product
where dt='$do_date') T2
on t1.orderid = t2.orderid
left join
(select productid, shopid, categoryid
from dim.dim_trade_product_info
where start_dt <= '$do_date'
and end_dt >= '$do_date' ) T3
on t2.productid=t3.productid;
insert overwrite table dws.dws_trade_orders_w
partition(dt='$do_date')
select t1.orderid,
t1.cat_3rd_id,
t2.thirdname,
t2.secondname,
t2.firstname,
t1.shopid,
t3.shopname,
t3.regionname,
t3.cityname,
t1.paymethod,
t1.productsnum,
t1.paymoney,
t1.paytime
from (select orderid,
cat_3rd_id,
shopid,
paymethod,
productsnum,
paymoney,
paytime
from dws.dws_trade_orders
where dt='$do_date') T1
join
(select thirdid, thirdname, secondid, secondname,
firstid, firstname
from dim.dim_trade_product_cat
where dt='$do_date') T2
on T1.cat_3rd_id = T2.thirdid
join
(select shopid, shopname, regionname, cityname
from dim.dim_trade_shops_org
where dt='$do_date') T3
on T1.shopid = T3.shopid
"
hive -e "$sql"
### --- 验证测试数据是否准备完成。
~~~ 保证测试的日期有数据。
dwd.dwd_trade_orders (拉链表、分区表)
ods.ods_trade_order_product (分区表)
dim.dim_trade_product_info(维表、拉链表)
dim.dim_trade_product_cat(分区表)
dim.dim_trade_shops_org(分区表)
~~~ # 构造测试数据(拉链分区表):
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
insert overwrite table dwd.dwd_trade_orders
partition(dt='2020-07-12')
select
orderid,
orderno,
userid,
status,
productmoney,
totalmoney,
paymethod,
ispay,
areaid,
tradesrc,
tradetype,
isrefund,
dataflag,
'2020-07-12',
paytime,
modifiedtime,
start_date,
end_date
from dwd.dwd_trade_orders
where end_date='9999-12-31';
### --- DWS层表数据加载
~~~ 加载数据到DWS订单明细表
[root@hadoop02 ~]# sh /data/yanqidw/script/trade/dws_load_trade_orders.sh 2020-07-12
### --- 查看DWS订单明细表
~~~ # 订单明细表
hive (default)> show partitions dws.dws_trade_orders;
partition
dt=2020-07-12
~~~ # 订单明细宽表
hive (default)> show partitions dws.dws_trade_orders_w;
partition
dt=2020-07-12