修改数据库模式
图(五)- 1-1 显示了修改后的模式,在它的customer_dim表和sales_order_fact表上增加了新列。customer_dim表增加的新列是shipping_address、shipping_zip_code、shipping_city和shipping_state。sales_order_fact表增加的新列是order_quantity。使用清单(五)-1-1里的SQL脚本修改数据库模式。USE dw; ALTER TABLE customer_dim ADD shipping_address VARCHAR (50) AFTER customer_state , ADD shipping_zip_code INT (5) AFTER shipping_address , ADD shipping_city VARCHAR (30) AFTER shipping_zip_code , ADD shipping_state VARCHAR (2) AFTER shipping_city ; ALTER TABLE customer_stg ADD shipping_address VARCHAR (50) AFTER customer_state , ADD shipping_zip_code INT (5) AFTER shipping_address , ADD shipping_city VARCHAR (30) AFTER shipping_zip_code , ADD shipping_state VARCHAR (2) AFTER shipping_city ; ALTER TABLE sales_order_fact ADD order_quantity INT AFTER order_amount ; USE source; ALTER TABLE customer ADD shipping_address VARCHAR (50) AFTER customer_state , ADD shipping_zip_code INT (5) AFTER shipping_address , ADD shipping_city VARCHAR (30) AFTER shipping_zip_code , ADD shipping_state VARCHAR (2) AFTER shipping_city ; ALTER TABLE sales_order ADD order_quantity INT AFTER order_amount ;
USE dw; -- 设置SCD的截止时间和生效时间 SET @pre_date = SUBDATE(CURRENT_DATE,1) ; -- 设置CDC的上限时间 UPDATE cdc_time SET current_load = CURRENT_DATE ; -- 装载客户维度 TRUNCATE TABLE customer_stg; INSERT INTO customer_stg SELECT customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state FROM source.customer ; /* 在所有地址列上 SCD2 */ /* 置过期 */ UPDATE customer_dim a, customer_stg b SET expiry_date = @pre_date WHERE a.customer_number = b.customer_number AND (a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND expiry_date = '2200-01-01'; /* 加新行 */ INSERT INTO customer_dim SELECT NULL , b.customer_number , b.customer_name , b.customer_street_address , b.customer_zip_code , b.customer_city , b.customer_state , b.shipping_address , b.shipping_zip_code , b.shipping_city , b.shipping_state , a.version + 1 , @pre_date , '2200-01-01' FROM customer_dim a , customer_stg b WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND EXISTS( SELECT * FROM customer_dim x WHERE b.customer_number=x.customer_number AND a.expiry_date = @pre_date ) AND NOT EXISTS ( SELECT * FROM customer_dim y WHERE b.customer_number = y.customer_number AND y.expiry_date = '2200-01-01') ; /* 在 customer_name 列上 SCD1 */ UPDATE customer_dim a, customer_stg b SET a.customer_name = b.customer_name WHERE a.customer_number = b.customer_number AND a.customer_name <> b.customer_name ; /* 新增的客户 */ INSERT INTO customer_dim SELECT NULL , customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state , 1 , @pre_date ,'2200-01-01' FROM customer_stg WHERE customer_number NOT IN( SELECT y.customer_number FROM customer_dim x, customer_stg y WHERE x.customer_number = y.customer_number) ; /* 装载产品维度 */ TRUNCATE TABLE product_stg ; INSERT INTO product_stg SELECT product_code , product_name , product_category FROM source.product ; /* 在 product_name 和 product_category 列上 SCD2 */ /* 置过期 */ UPDATE product_dim a , product_stg b SET expiry_date = @pre_date WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category) AND expiry_date = '2200-01-01'; /* 加新行 */ INSERT INTO product_dim SELECT NULL , b.product_code , b.product_name , b.product_category , a.version + 1 , @pre_date ,'2200-01-01' FROM product_dim a , product_stg b WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category) AND EXISTS( SELECT * FROM product_dim x WHERE b.product_code = x.product_code AND a.expiry_date = @pre_date) AND NOT EXISTS ( SELECT * FROM product_dim y WHERE b.product_code = y.product_code AND y.expiry_date = '2200-01-01') ; /* 新增的产品 */ INSERT INTO product_dim SELECT NULL , product_code , product_name , product_category , 1 , @pre_date , '2200-01-01' FROM product_stg WHERE product_code NOT IN( SELECT y.product_code FROM product_dim x, product_stg y WHERE x.product_code = y.product_code) ; -- 装载订单维度,新增前一天的订单号 INSERT INTO order_dim ( order_number , effective_date , expiry_date) SELECT order_number , order_date , '2200-01-01' FROM source.sales_order, cdc_time WHERE entry_date >= last_load AND entry_date < current_load ; -- 装载事实表,新增前一天的订单 INSERT INTO sales_order_fact SELECT order_sk , customer_sk , product_sk , date_sk , order_amount , order_quantity FROM source.sales_order a , order_dim b , customer_dim c , product_dim d , date_dim e , cdc_time f WHERE a.order_number = b.order_number AND a.customer_number = c.customer_number AND a.order_date >= c.effective_date AND a.order_date < c.expiry_date AND a.product_code = d.product_code AND a.order_date >= d.effective_date AND a.order_date < d.expiry_date AND a.order_date = e.date AND a.entry_date >= f.last_load AND a.entry_date < f.current_load ; -- 更新时间戳表的last_load字段 UPDATE cdc_time SET last_load = current_load ; COMMIT ;
USE source; /*** 客户数据的改变如下: 更新已有八个客户的送货地址 新增客户9 ***/ UPDATE customer SET shipping_address = customer_street_address , shipping_zip_code = customer_zip_code , shipping_city = customer_city , shipping_state = customer_state ; INSERT INTO customer (customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state) VALUES ('Online Distributors' , '2323 Louise Dr.' , 17055 , 'Pittsburgh' , 'PA' , '2323 Louise Dr.' , 17055 , 'Pittsburgh' , 'PA') ; INSERT INTO sales_order VALUES (38, 1, 1, '2015-03-02', '2015-03-02', 1000, 10) , (39, 2, 2, '2015-03-02', '2015-03-02', 2000, 20) , (40, 3, 3, '2015-03-02', '2015-03-02', 4000, 40) , (41, 4, 4, '2015-03-02', '2015-03-02', 6000, 60) , (42, 5, 1, '2015-03-02', '2015-03-02', 2500, 25) , (43, 6, 2, '2015-03-02', '2015-03-02', 5000, 50) , (44, 7, 3, '2015-03-02', '2015-03-02', 7500, 75) , (45, 8, 4, '2015-03-02', '2015-03-02', 1000, 10) , (46, 9, 1, '2015-03-02', '2015-03-02', 1000, 10) ; COMMIT ;