INSERT INTO t1 (id,dt) VALUES
(1,'2019-01-01 00:00:00'),
(1,'2019-01-02 00:00:00'),
(1,'2019-01-03 00:00:00'),
(3,'2019-03-07 00:00:00'),
(2,'2019-02-01 00:00:00'),
(2,'2019-02-02 00:00:00'),
(3,'2019-03-04 00:00:00'),
(3,'2019-03-05 00:00:00'),
(3,'2019-03-06 00:00:00');
解析思路:第二步使用row_number()函数与日期做减法
select id,date2,count(*) as day_cnt from
(
select id,date1,date_add(date1,-row_number() over(partition by id order by date1)) as date2 from
(
select id,substr(dt,1,10) date1
from t1
group by id,substr(dt,1,10)
) a
) b
group by id,date2
having count(*) > 3;
select id,max(day_cnt) max_day_cnt from
(
select id,date2,count(*) as day_cnt from
(
select id,date1,date_add(date1,-row_number() over(partition by id order by date1)) as date2 from
(
select id,substr(dt,1,10) date1
from t1
group by id,substr(dt,1,10)
) a
) b
group by id,date2
) c
group by id;
解析思路:在1.1解题思路的基础上使用Max函数即可
总结:"连续-类题"解题思路 row_number()函数减一下,再分组count
分析:求每个用户最大的连续登陆天数,断一天还算连续登录(两个日期的差小于或等于 2 )。 比如 11-06 号登录,最近的下一次登录是 01-08 号,两个日期的差等于 2 天,因此这两个日期之间的天数都算作连续天数,一共 3 天。
准备数据:
//创建测试表test_login
create table if not exists test_login(
user_id string,
login_date string)
row format delimited fields terminated by '\t';
//装载数据
//load data local inpath '/opt/module/data/test1_login.txt' into table test_login;
INSERT INTO test_login (user_id,login_date) VALUES
(1001,'2021-11-05'),
(1001,'2021-11-06'),
(1001,'2021-11-08'),
(1001,'2021-11-10'),
(1001,'2021-11-15'),
(1001,'2021-11-16'),
(1001,'2021-11-18'),
(1001,'2021-11-19'),
(1002,'2021-11-05'),
(1002,'2021-11-07'),
(1002,'2021-11-09'),
(1002,'2021-11-11'),
(1002,'2021-11-13'),
(1002,'2021-11-16');
解题思路:
1.相邻两次登录日期差值小于等于2即为同一连续登录组
2.相邻两次登录日期差值大于2更换连续登录组(相邻两个连续登录组的分隔条件为分隔位置登录日期差值大于2)
3.连续登录日期组最大日期-最小日期差值加1即为连续登录天数
代码展示:
SELECT user_id,max(continuous_login_days) max_continuous_login_days from
(
select user_id,flag,
datediff(max(login_date),min(login_date))+1 as continuous_login_days
from
(
SELECT user_id ,cast(login_date as Date) as login_date,SUM(if(datediff(login_date,last) <= 2,0,1))
over(partition by user_id order by login_date) flag
from
(
select
user_id,
login_date,
lag(login_date, 1, '1970-01-01')
over (partition by user_id order by login_date) last
from
test_login
-- 此处我们使用lag窗口函数,开窗查询前一行数据,注意每个用户的第一条登陆信息上一天为null,这里我们给1970-01-01,防止空指针异常。
)aa
)bb
group by user_id,flag
)cc
group by user_id
;