CC00055.pbpositions——|Hadoop&PB级数仓.V13|——|PB数仓.v13|核心交易分析|DWS层建表|加载数据|

一、DWS层建表及数据加载说明
### --- DIM、DWD => 数据仓库分层、数据仓库理论

~~~     # 需求:计算当天
~~~     全国所有订单信息
~~~     全国、一级商品分类订单信息
~~~     全国、二级商品分类订单信息
~~~     大区所有订单信息
~~~     大区、一级商品分类订单信息
~~~     大区、二级商品分类订单信息
~~~     城市所有订单信息
~~~     城市、一级商品分类订单信息
~~~     城市、二级商品分类订单信息
### --- 需要的信息:订单表、订单商品表、商品信息维表、商品分类维表、商家地域维表

~~~     订单表 => 订单id、订单状态
~~~     订单商品表 => 订单id、商品id、商家id、单价、数量
~~~     商品信息维表 => 商品id、三级分类id
~~~     商品分类维表 => 一级名称、一级分类id、二级名称、二级分类id、三级名称、三级分类id
~~~     商家地域维表 => 商家id、区域名称、区域id、城市名称、城市id
~~~     订单表、订单商品表、商品信息维表 => 订单id商品id、商家id、三级分类id、单价、数量(订单明细表)
~~~     订单明细表、商品分类维表、商家地域维表 
~~~     => 订单id、商品id、商家id、三级分类名称、三级分类名称、三级分类名称、单价、数量、区域、城市 
~~~     => 订单明细宽表
二、DWS层建表及加载数据
### --- DWS层建表

~~~     # dws_trade_orders(订单明细)由以下表轻微聚合而成:
dwd.dwd_trade_orders (拉链表、分区表)
ods.ods_trade_order_product (分区表)
dim.dim_trade_product_info(维表、拉链表)
~~~     # dws_trade_orders_w(订单明细宽表)由以下表组成:
ads.dws_trade_orders (分区表)
dim.dim_trade_product_cat(分区表)
dim.dim_trade_shops_org(分区表)
### --- DWS层建表:订单明细表
~~~     # 订单明细表(轻度汇总事实表)。每笔订单的明细

DROP TABLE IF EXISTS dws.dws_trade_orders;

create table if not exists dws.dws_trade_orders(
orderid string, -- 订单id
cat_3rd_id string, -- 商品三级分类id
shopid string, -- 店铺id
paymethod tinyint, -- 支付方式
productsnum bigint, -- 商品数量
paymoney double, -- 订单商品明细金额
paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;
~~~     # 订单明细表宽表

DROP TABLE IF EXISTS dws.dws_trade_orders_w;

create table if not exists dws.dws_trade_orders_w(
orderid string, -- 订单id
cat_3rd_id string, -- 商品三级分类id
thirdname string, -- 商品三级分类名称
secondname string, -- 商品二级分类名称
firstname string, -- 商品一级分类名称
shopid string, -- 店铺id
shopname string, -- 店铺名
    regionname string, -- 店铺所在大区
cityname string, -- 店铺所在城市
paymethod tinyint, -- 支付方式
productsnum bigint, -- 商品数量
paymoney double, -- 订单明细金额
paytime string -- 订单时间
)
partitioned by (dt string)
STORED AS PARQUET;
### --- DWS层加载数据
~~~     dws_trade_orders/dws_trade_orders_w 中一笔订单可能出现多条记录!

[root@hadoop02 ~]# vim /data/yanqidw/script/trade/dws_load_trade_orders.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ]
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table dws.dws_trade_orders
partition(dt='$do_date')
    select t1.orderid as orderid,
    t3.categoryid as cat_3rd_id,
    t3.shopid as shopid,
    t1.paymethod as paymethod,
    t2.productnum as productsnum,
    t2.productnum*t2.productprice as pay_money,
    t1.paytime as paytime
from (select orderid, paymethod, paytime
from dwd.dwd_trade_orders
where dt='$do_date') T1
left join
(select orderid, productid, productnum, productprice
from ods.ods_trade_order_product
where dt='$do_date') T2
on t1.orderid = t2.orderid
left join
(select productid, shopid, categoryid
from dim.dim_trade_product_info
where start_dt <= '$do_date'
and end_dt >= '$do_date' ) T3
on t2.productid=t3.productid;
insert overwrite table dws.dws_trade_orders_w
partition(dt='$do_date')
    select t1.orderid,
    t1.cat_3rd_id,
    t2.thirdname,
    t2.secondname,
    t2.firstname,
    t1.shopid,
    t3.shopname,
    t3.regionname,
    t3.cityname,
    t1.paymethod,
    t1.productsnum,
    t1.paymoney,
    t1.paytime
from (select orderid,
    cat_3rd_id,
    shopid,
    paymethod,
    productsnum,
    paymoney,
    paytime
from dws.dws_trade_orders
where dt='$do_date') T1
join
(select thirdid, thirdname, secondid, secondname,
firstid, firstname
from dim.dim_trade_product_cat
where dt='$do_date') T2
on T1.cat_3rd_id = T2.thirdid
join
(select shopid, shopname, regionname, cityname
from dim.dim_trade_shops_org
where dt='$do_date') T3
on T1.shopid = T3.shopid
"

hive -e "$sql"
### --- 验证测试数据是否准备完成。
~~~     保证测试的日期有数据。

dwd.dwd_trade_orders (拉链表、分区表)
ods.ods_trade_order_product (分区表)
dim.dim_trade_product_info(维表、拉链表)
dim.dim_trade_product_cat(分区表)
dim.dim_trade_shops_org(分区表)
~~~     # 构造测试数据(拉链分区表):

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
insert overwrite table dwd.dwd_trade_orders
partition(dt='2020-07-12')
select
orderid,
orderno,
userid,
status,
productmoney,
totalmoney,
paymethod,
ispay,
areaid,
tradesrc,
tradetype,
isrefund,
dataflag,
'2020-07-12',
paytime,
modifiedtime,
start_date,
end_date
from dwd.dwd_trade_orders
where end_date='9999-12-31';
### --- DWS层表数据加载
~~~     加载数据到DWS订单明细表

[root@hadoop02 ~]# sh /data/yanqidw/script/trade/dws_load_trade_orders.sh  2020-07-12
### --- 查看DWS订单明细表

~~~     # 订单明细表
hive (default)> show partitions dws.dws_trade_orders;
partition
dt=2020-07-12
~~~     # 订单明细宽表
hive (default)> show partitions dws.dws_trade_orders_w;
partition
dt=2020-07-12

你可能感兴趣的:(数据仓库,大数据,hive,数据分析,人工智能)