拉链表设计图:
create database ods;
create database dwd;
create table ods_order_init(
`id` string COMMENT '订单编号',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户id',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间'
) COMMENT '订单表'
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1001','未支付','1001','2019-04-15','2019-04-15');
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1002','未支付','1002','2019-04-15','2019-04-15');
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1003','未支付','1003','2019-04-15','2019-04-15');
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1001','支付成功','1001','2019-04-16','2019-04-16');
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1002','支付成功','1002','2019-04-16','2019-04-16');
insert into table ods.ods_order
partition(dt = '2019-04-15')
select * from ods.ods_order_init where ods_order_init.operate_time='2019-04-15';
create table dwd_order_info (
`id` string COMMENT '',
`order_status` string COMMENT ' 1 2 3 4 5',
`user_id` string COMMENT '用户id',
`create_time` string COMMENT '',
`operate_time` string COMMENT ''
) COMMENT '订单信息'
PARTITIONED BY ( `dt` string);
create table dwd_order_his(
`id` string COMMENT '订单编号',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户id',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`start_date` string COMMENT '有效开始日期',
`end_date` string COMMENT '有效结束日期'
) COMMENT '订单拉链表';
create table dwd_order_tmp(
`id` string COMMENT '订单编号',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户id' ,
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`start_end` string COMMENT '有效开始日期',
`end_date` string COMMENT '有效结束日期'
) COMMENT '订单拉链临时表';
insert overwrite table dwd.dwd_order_his
select
`id`,
`order_status`,
`user_id`,
`create_time`,
`operate_time`,
'2019-04-15' ,
'9999-99-99'
from ods.ods_order;
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1001','支付成功','1001','2019-04-16','2019-04-16');
insert into ods_order_init
(`id`,`order_status`,`user_id`,`create_time`,`operate_time`)
values
('1002','支付成功','1002','2019-04-16','2019-04-16');
insert into table ods.ods_order
partition(dt = '2019-04-16')
select * from ods.ods_order_init where ods_order_init.operate_time='2019-04-16';
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table dwd.dwd_order_info
partition(dt)
select * from ods.ods_order o where o.dt='2019-04-16';
insert overwrite table dwd_order_tmp
select * from
(
select
id,
order_status ,
user_id,
create_time ,
operate_time ,
'2019-04-16' start_date,
'9999-99-99' end_date
from dwd.dwd_order_info where dt='2019-04-16'
union all
select
oh.id,
oh.order_status ,
oh.user_id,
oh.create_time ,
oh.operate_time,
oh.start_date,
if(oi.id is null ,oh.end_date, date_add(oi.dt,-1)) end_date
from dwd.dwd_order_his oh
left join
(select * from dwd_order_info where dt='2019-04-16') oi
on oh.id=oi.id and oh.end_date='9999-99-99'
)his
order by his.id,start_date
insert overwrite table dwd_order_his
select * from dwd_order_tmp;
通过 某个日期>=生效开始日期 且 某个日期<=生效结束日期
能够得到某个时间点的数据全量切片。
比如
select * from order_info where start_date<=’2019-01-01’ and end_date>=’2019-01-01’
就会得到 2019-01-01的历史切片