数仓sql面试题-连续登录天数和连续未登录天数

我也写一个,其实主要解决思路就是要让连续的两条记录之间产生联系。

 

CREATE TABLE login_log
(
  user_id integer,
  login_date date
);


insert into login_log values(1,'2020-01-01');
insert into login_log values(1,'2020-01-02');
insert into login_log values(1,'2020-01-04');
insert into login_log values(1,'2020-01-05');
insert into login_log values(1,'2020-01-06');
insert into login_log values(1,'2020-01-07');
insert into login_log values(1,'2020-01-08');
insert into login_log values(1,'2020-01-09');
insert into login_log values(1,'2020-01-10');
insert into login_log values(1,'2020-01-12');
insert into login_log values(1,'2020-01-13');
insert into login_log values(1,'2020-01-15');
insert into login_log values(1,'2020-01-16');
insert into login_log values(2,'2020-01-01');
insert into login_log values(2,'2020-01-02');
insert into login_log values(2,'2020-01-03');
insert into login_log values(2,'2020-01-04');
insert into login_log values(2,'2020-01-05');
insert into login_log values(2,'2020-01-06');
insert into login_log values(2,'2020-01-07');
insert into login_log values(2,'2020-01-08');
insert into login_log values(2,'2020-01-09');
insert into login_log values(2,'2020-01-10');
insert into login_log values(2,'2020-01-11');
insert into login_log values(2,'2020-01-12');
insert into login_log values(2,'2020-01-13');
insert into login_log values(2,'2020-01-16');
insert into login_log values(2,'2020-01-17');

 

create table login_rank as
select  user_id,login_date,row_number() over(partition by user_id order by login_date) day_rank
from login_log
;
 

//连续登录天数
drop table login_rank_num ;
create table login_rank_num as
select t1.user_id
,t1.login_date
,t1.day_rank
from login_rank t1
left join login_rank  t2
on t1.user_id = t2.user_id
and date_add(t1.login_date,-t1.day_rank) = date_add(t2.login_date,-t2.day_rank)
and t1.login_date <= t2.login_date
;

select user_id,login_date,count(login_date) as logdays
from login_rank_num
group by user_id,login_date
;

//连续未登录天数
drop table login_rank_diff ;
create table login_rank_diff as
select t1.user_id
,t1.login_date 
,datediff(t2.login_date,t1.login_date )-1 as daysdiff
from login_rank  t1
left join login_rank  t2
on t1.user_id = t2.user_id
and t1.day_rank = t2.day_rank-1
;

select *
from login_rank_diff 
;

你可能感兴趣的:(hive)