整体实现思路:
1.用户每天可能不止登陆一次,将登录日期去重,取出当日登陆成功的日期,row_number()函数分组排序并计数
2.日期减去计数得到值
3.根据每个用户count(值)判断连续登陆天数
4.最后取连续登陆天数大于等于7天的用户
示例:
CREATE TABLE db_test.user_log_test(
datestr
string comment ‘日期’,
uid
string comment ‘用户id’,
status
int comment ‘登陆状态 1:成功 0:失败’)
comment ‘用户登陆日志表’
stored as orc;
insert into db_test.user_log_test values(‘2020-08-30’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-30’,‘1’,0);
insert into db_test.user_log_test values(‘2020-08-29’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-28’,‘1’,0);
insert into db_test.user_log_test values(‘2020-08-27’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-26’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-25’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-24’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-23’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-22’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-21’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-20’,‘1’,1);
insert into db_test.user_log_test values(‘2020-08-25’,‘2’,1);
insert into db_test.user_log_test values(‘2020-08-24’,‘2’,1);
insert into db_test.user_log_test values(‘2020-08-23’,‘2’,0);
insert into db_test.user_log_test values(‘2020-08-22’,‘2’,1);
insert into db_test.user_log_test values(‘2020-08-21’,‘2’,1);
insert into db_test.user_log_test values(‘2020-08-20’,‘2’,1);
insert into db_test.user_log_test values(‘2020-08-26’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-25’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-24’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-23’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-22’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-21’,‘3’,1);
insert into db_test.user_log_test values(‘2020-08-20’,‘3’,1);
操作步骤:
1.计算出用户登陆成功的日期
select
uid
,datestr
from (
select
uid
,datestr
,row_number()over(partition by uid,datestr order by datestr asc) as rn
–,row_number()over(distribute by uid,datestr sort by datestr asc) as rn
from db_test.user_log_test
where status
=1
) a
where rn=1
;
2.日期减去计数得到值
select
uid,date_sub(datestr,num) date_rn
from (
select
uid
,datestr
,row_number() over(partition by uid order by datestr) as num
from (
select
uid
,datestr
,row_number()over(partition by uid,datestr order by datestr asc) as rn
–,row_number()over(distribute by uid,datestr sort by datestr asc) as rn
from db_test.user_log_test
where status
=1
) a
where rn=1
) b
3.根据每个用户count(值)判断连续登陆天数
select uid,count(*) cnt–连续登陆天数
from (
select
uid,date_sub(datestr,num) date_rn
from (
select
uid
,datestr
,row_number() over(partition by uid order by datestr) as num
from (
select
uid
,datestr
,row_number()over(partition by uid,datestr order by datestr asc) as rn
–,row_number()over(distribute by uid,datestr sort by datestr asc) as rn
from db_test.user_log_test
where status
=1
) a
where rn=1
) b)c
group by uid,date_rn
4.最后取连续登陆天数大于等于7天的用户
select uid
from (
select
uid,date_sub(datestr,num) date_rn
from (
select
uid
,datestr
,row_number() over(partition by uid order by datestr) as num
from (
select
uid
,datestr
,row_number()over(partition by uid,datestr order by datestr asc) as rn
–,row_number()over(distribute by uid,datestr sort by datestr asc) as rn
from db_test.user_log_test
where status
=1
) a
where rn=1
) b)c
group by uid,date_rn
having count(1)>=7
或者用窗口分析函数更快查询出来
–所有用户信息
select
*
from
(
select uid ,datestr, lead(datestr,6,-1) over(partition by uid order by datestr desc ) as date1
from db_test.user_log_test a
group by uid,datestr
) as b
where
date_sub(cast(b.datestr as date),6)=cast(b.date1 as date);
--用户数
select
count(distinct uid)
from
(
select uid ,datestr, lead(datestr,6,-1) over(partition by uid order by datestr desc ) as date1
from db_test.user_log_test a
group by uid,datestr
) as b
where
date_sub(cast(b.datestr as date),6)=cast(b.date1 as date);
统计连续登陆7天的用户个数(n天就只需要把lead(date,6,-1)中的6改成n-1并且把date_sub(cast(b.date as date),6)中的6改成n-1)