use dw; create table month_end_balance_fact ( month_sk int, product_sk int, month_end_amount_balance decimal(10,2), month_end_quantity_balance int );因为对此事实表只有insert操作,没有update、delete操作,所以这里没有用orc文件格式,而是采用了缺省的文本格式。
use dw; insert overwrite table month_end_balance_fact select a.month_sk, b.product_sk, sum(b.month_order_amount) month_order_amount, sum(b.month_order_quantity) month_order_quantity from month_dim a, (select a.*, b.year, b.month, max(a.order_month_sk) over () max_month_sk from month_end_sales_order_fact a, month_dim b where a.order_month_sk = b.month_sk) b where a.month_sk <= b.max_month_sk and a.year = b.year and b.month <= a.month group by a.month_sk , b.product_sk;为了确认初始装载是否正确,先查询month_end_sales_order_fact表,然后在执行完初始装载后查询month_end_balance_fact表。
use dw; select b.year year, b.month month, a.product_sk psk, a.month_order_amount amt, a.month_order_quantity qty from month_end_sales_order_fact a, month_dim b where a.order_month_sk = b.month_sk cluster by year, month, psk;查询结果如下图所示。
use dw; select b.year year, b.month month, a.product_sk psk, a.month_end_amount_balance amt, a.month_end_quantity_balance qty from month_end_balance_fact a, month_dim b where a.month_sk = b.month_sk cluster by year, month, psk;查询结果如下图所示。
-- 设置变量以支持事务 set hive.support.concurrency=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.dbtxnmanager; set hive.compactor.initiator.on=true; set hive.compactor.worker.threads=1; use dw; set hivevar:pre_month_date = add_months(current_date,-1); set hivevar:year = year(${hivevar:pre_month_date}); set hivevar:month = month(${hivevar:pre_month_date}); insert into month_end_balance_fact select order_month_sk, product_sk, sum(month_order_amount), sum(month_order_quantity) from (select a.* from month_end_sales_order_fact a, month_dim b where a.order_month_sk = b.month_sk and b.year = ${hivevar:year} and b.month = ${hivevar:month} union all select month_sk + 1 order_month_sk, product_sk product_sk, month_end_amount_balance month_order_amount, month_end_quantity_balance month_order_quantity from month_end_balance_fact a where a.month_sk in (select max(case when ${hivevar:month} = 1 then 0 else month_sk end) from month_end_balance_fact)) t group by order_month_sk, product_sk;
insert into dw.month_end_sales_order_fact values (200,1,1000,10),(200,6,1000,10);(2)设置时间
beeline -u jdbc:hive2://cdh2:10000/dw -f month_balance_sum.sql(4)查询month_end_balance_fact表
select * from dw.month_end_balance_fact a cluster by a.month_sk, a.product_sk;查询结果如下图所示。
insert into dw.month_end_sales_order_fact values (205,1,1000,10); insert into dw.month_end_sales_order_fact values (205,6,1000,10);(2)使用下面的命令向month_end_balance_fact表添加三条记录
insert into dw.month_end_balance_fact values (204,1,1000,10); insert into dw.month_end_balance_fact values (204,6,1000,10); insert into dw.month_end_balance_fact values (204,3,1000,10);(3)将set hivevar:pre_month_date = add_months(current_date,-1); 行改为set hivevar:pre_month_date = add_months('2017-02-01',-1);,装载2017年1月的数据。
beeline -u jdbc:hive2://cdh2:10000/dw -f month_balance_sum.sql(5)查询month_end_balance_fact表
select * from dw.month_end_balance_fact a cluster by a.month_sk, a.product_sk;查询结果如下图所示。
delete from dw.month_end_sales_order_fact where order_month_sk >=200; create table t1 as select * from month_end_balance_fact where month_sk < 200; insert overwrite table month_end_balance_fact select * from t1; drop table t1;
use dw; select year, month, sum(month_end_amount_balance) s from month_end_balance_fact a, month_dim b where a.month_sk = b.month_sk group by year, month cluster by year, month;查询结果如下图所示。
use dw; select product_name, sum(month_end_amount_balance) s from month_end_balance_fact a, product_dim b where a.product_sk = b.product_sk group by product_name;查询结果如下图所示。
use dw; select product_name, sum(month_order_amount) s from month_end_sales_order_fact a, product_dim b where a.product_sk = b.product_sk group by product_name;
查询结果如下图所示。