use dw; -- 增加促销期列 alter table month_dim rename to month_dim_old; create table month_dim ( month_sk int comment 'surrogate key', month tinyint comment 'month', month_name varchar(9) comment 'month name', campaign_session varchar(30) comment 'campaign session', quarter tinyint comment 'quarter', year smallint comment 'year' ) comment 'month dimension table' clustered by (month_sk) into 8 buckets stored as orc tblproperties ('transactional'='true') ; insert into month_dim select month_sk,month,month_name,null,quarter,year from month_dim_old; drop table month_dim_old; -- 建立促销期过渡表 use rds; create table campaign_session ( campaign_session varchar(30), month tinyint, year smallint ) row format delimited fields terminated by ',' stored as textfile;
修改后的模式如下图所示。
假设所有促销期都不跨年,并且一个促销期可以包含一个或多个年月,但一个年月只能属于一个促销期。为了理解促销期如何工作,看下表的示例。
Campaign Session |
Month |
2016 First Campaign |
January-April |
2016 Second Campaign |
May-July |
2016 Third Campaign |
August-August |
2016 Last Campaign |
September-December |
2016 First Campaign,1,2016 2016 First Campaign,2,2016 2016 First Campaign,3,2016 2016 First Campaign,4,2016 2016 Second Campaign,5,2016 2016 Second Campaign,6,2016 2016 Second Campaign,7,2016 2016 Third Campaign,8,2016 2016 Last Campaign,9,2016 2016 Last Campaign,10,2016 2016 Last Campaign,11,2016 2016 Last Campaign,12,2016现在可以执行下面的脚本把2016年的促销期数据装载进月维度。
use rds; load data local inpath '/root/campaign_session.csv' overwrite into table campaign_session; use dw; drop table if exists tmp; create table tmp as select t1.month_sk month_sk, t1.month month, t1.month_name month_name, t2.campaign_session campaign_session, t1.quarter quarter, t1.year year from month_dim t1 inner join rds.campaign_session t2 on t1.year = t2.year and t1.month = t2.month; delete from month_dim where month_dim.month_sk in (select month_sk from tmp); insert into month_dim select * from tmp; select year,month,campaign_session from dw.month_dim;查询结果如下图所示,2016年的促销期已经有数据,其它年份的campaign_session字段值为null。
USE dw; SELECT product_category, time, order_amount, order_quantity FROM ( SELECT * FROM (SELECT product_category, year, 1 month, year time, 1 sequence, SUM(month_order_amount) order_amount, SUM(month_order_quantity) order_quantity FROM month_end_sales_order_fact a, product_dim b, month_dim c WHERE a.product_sk = b.product_sk AND a.order_month_sk = c.month_sk AND year = 2016 GROUP BY product_category, year UNION ALL SELECT product_category, year, month, campaign_session time, 2 sequence, SUM(month_order_amount) order_amount, SUM(month_order_quantity) order_quantity FROM month_end_sales_order_fact a, product_dim b, month_dim c WHERE a.product_sk = b.product_sk AND a.order_month_sk = c.month_sk AND year = 2016 GROUP BY product_category, year, month, campaign_session UNION ALL SELECT product_category, year, month, month_name time, 3 sequence, SUM(month_order_amount) order_amount, SUM(month_order_quantity) order_quantity FROM month_end_sales_order_fact a, product_dim b, month_dim c WHERE a.product_sk = b.product_sk AND a.order_month_sk = c.month_sk AND year = 2016 GROUP BY product_category, year, quarter, month, month_name) t CLUSTER BY product_category, year, month, sequence) t;查询结果如下图所示。
,1,2016 2016 Early Spring Campaign,2,2016 2016 Early Spring Campaign,3,2016 ,4,2016 2016 Spring Campaign,5,2016 ,6,2016 2016 Last Campaign,7,2016 2016 Last Campaign,8,2016 ,9,2016 ,10,2016 ,11,2016 ,12,2016下面的命令先把campaign_session字段置空,然后向month_dim表装载促销期数据。
use rds; load data local inpath '/root/ragged_campaign.csv' overwrite into table campaign_session; use dw; update month_dim set campaign_session = null; drop table if exists tmp; create table tmp as select t1.month_sk month_sk, t1.month month, t1.month_name month_name, case when t2.campaign_session != '' then t2.campaign_session else t1.month_name end campaign_session, t1.quarter quarter, t1.year year from month_dim t1 inner join rds.campaign_session t2 on t1.year = t2.year and t1.month = t2.month; delete from month_dim where month_dim.month_sk in (select month_sk from tmp); insert into month_dim select * from tmp; select year,month,campaign_session from dw.month_dim;查询结果如下图所示。