每日增量没有匹配的数据和拉链表endtime<99991231的数据保持不变,
其他的endtime变为每日增量的starttime 1),如果数据发生修改,
拉链表和增量数据左连接,将拉链表的endtime修改为增量表starttime1,
(左连接匹配上的数据说明数据有变动,
但是拉链表endtime<99991231说明已经是历史状态,不需要改变)。
CREATE TABLE zipper( --拉链表
userid VARCHAR(20),
phone VARCHAR(20),
nick VARCHAR(20),
gender VARCHAR(20),
addr VARCHAR(20),
starttime DATE,
endtime DATE
) row format delimited fields terminated by '\t';
CREATE TABLE zipper_update( --每日增量数据
userid VARCHAR(20),
phone VARCHAR(20),
nick VARCHAR(20),
gender VARCHAR(20),
addr VARCHAR(20),
starttime DATE,
endtime DATE
) row format delimited fields terminated by '\t';
create table tmp_zipper(
userid string,
phone string,
nick string,
gender string,
addr string,
starttime string,
endtime string
) row format delimited fields terminated by '\t';
load data local inpath '/hivedata/zipper.txt' into table zipper;
load data local inpath '/hivedata/update.txt' into table zipper_update;
SELECT * FROM zipper; --拉链表数据
select * from zipper_update; --每日增量数据
拉链表和增量表左关联数据
SELECT *
FROM zipper a
LEFT JOIN zipper_update b ON a.userid = b.userid;
结果查询
将拉链表和增量左关联,关联条件匹配上说明数据已经修改,同时判断拉链表endtime,如果小于9999-12-31说明已经是历史状态数据不做修改(将on关联条件匹配和拉链表endtime !< 9999-12-31)同时满足的数据(拉链表endtime)修改为增量表starttime - 1(第二天同步前一天数据)
select
a.userid
,a.phone
,a.nick
,a.gender
,a.addr
,a.starttime
,if(b.userid is null or a.endtime < '9999-12-31',a.endtime,date_sub(b.starttime,1)) as endtime
from zipper a
left join zipper_update b on a.userid = b. userid;
结果查询
INSERT overwrite tmp_zipper
SELECT
userid
,phone
,nick
,gender
,addr
,starttime
,endtime
FROM zipper_update
UNION ALL
select
a.userid
,a.phone
,a.nick
,a.gender
,a.addr
,a.starttime
,if(b.userid is null or a.endtime < '9999-12-31',a.endtime,date_sub(b.starttime,1)) as endtime
from zipper a
left join zipper_update b on a.userid = b. userid;
结果查询