间断连续登陆天数问题

间断连续登陆天数问题

问题:
统计用户最大连续登陆天数, 间隔一天也算是连续登陆;例如: 用户 1 3 5 8 登陆, 算做连续登陆5天

1- 数据准备
-- 数据准备
WITH user_active_info AS (
SELECT * FROM (
    VALUES ('10001' , '2023-02-01'),('10001' , '2023-02-03')
          ,('10001' , '2023-02-08'),('10001' , '2023-02-05')
          ,('10002' , '2023-02-02'),('10002' , '2023-02-10')
          ,('10002' , '2023-02-04'),('10002' , '2023-02-05')
          ,('10002' , '2023-02-07'),('10003' , '2023-02-02')
          ,('10003' , '2023-02-03'),('10003' , '2023-02-04')
          ,('10003' , '2023-02-04'),('10003' , '2023-02-06')
          ,('10003' , '2023-02-09'),('10003' , '2023-02-08')
          ,('10004' , '2023-02-03'),('10004' , '2023-02-04')
          ,('10004' , '2023-02-06'),('10004' , '2023-02-09')
          ,('10004' , '2023-02-08'),('10004' , '2023-02-08') 
    	  ,('10005' , '2023-02-02'),('10005' , '2023-02-05') 
    	  ,('10005' , '2023-02-06'),('10005' , '2023-02-09')  
) AS user_active_info(user_id, active_date) 
)
2- 代码实现
-- 1. 获取上次登录时间
, t_1 AS (
SELECT 
      user_id, active_date
    , LAG(active_date, 1, '1970-01-01') OVER(PARTITION BY user_id ORDER BY active_date) AS lag1_date 
FROM ( -- 按照用户和日期去重, 每天只保留一条登陆记录
    SELECT user_id , active_date 
    FROM user_active_info 
    GROUP BY user_id , active_date 
) a
)
user_id active_date lag1_date
10001 2023-02-01 1970-01-01
10001 2023-02-03 2023-02-01
10001 2023-02-05 2023-02-03
10001 2023-02-08 2023-02-05
10002 2023-02-02 1970-01-01
10002 2023-02-04 2023-02-02
10002 2023-02-05 2023-02-04
10002 2023-02-07 2023-02-05
10002 2023-02-10 2023-02-07
10003 2023-02-02 1970-01-01
10003 2023-02-03 2023-02-02
10003 2023-02-04 2023-02-03
-- 2. 打标签: 连续登陆的标为相同标签
, t_2 AS (
SELECT 
      user_id, active_date, lag1_date, flag
    , CONCAT(user_id, '_' ,flag) AS user_id_flag -- 合成新的标签: 标签相同为连续登陆
FROM (
    SELECT -- 打标签, 两次日期相差大于2, 开始分段打标签
          user_id, active_date, lag1_date
        , SUM(IF(DATEDIFF(active_date, lag1_date) > 2 , 1 , 0)) OVER(PARTITION BY user_id ORDER BY active_date)  AS flag
    FROM t_1
) a
)
user_id active_date lag1_date flag user_id_flag
10001 2023-02-01 1970-01-01 1 10001_1
10001 2023-02-03 2023-02-01 1 10001_1
10001 2023-02-05 2023-02-03 1 10001_1
10001 2023-02-08 2023-02-05 2 10001_2
10002 2023-02-02 1970-01-01 1 10002_1
10002 2023-02-04 2023-02-02 1 10002_1
10002 2023-02-05 2023-02-04 1 10002_1
10002 2023-02-07 2023-02-05 1 10002_1
10002 2023-02-10 2023-02-07 2 10002_2
10003 2023-02-02 1970-01-01 1 10003_1
10003 2023-02-03 2023-02-02 1 10003_1
-- 3. 计算每个用户每组连续天数 -> 获取每个用户的最大连续天数
SELECT -- 获取每个用户的最大连续天数
    user_id, MAX(continue_days) AS max_continue_days
FROM (
    SELECT -- 计算每个用户每组连续天数
          user_id , user_id_flag
        , DATEDIFF(MAX(active_date),MIN(active_date)) + 1 AS continue_days
    FROM t_2
    GROUP BY user_id , user_id_flag
) a
GROUP BY user_id 
;
user_id user_id_flag continue_days
10001 10001_1 5
10001 10001_2 1
10002 10002_1 6
10002 10002_2 1
10003 10003_1 8
10004 10004_1 7
10005 10005_1 1
10005 10005_2 2
10005 10005_3 1
user_id max_continue_days
10001 5
10002 6
10003 8
10004 7
10005 2
3- 总结
  1. 连续登陆天数的升级版
  2. 结合浏览窗口划分
end

你可能感兴趣的:(SQL,大数据,数据库,sql,大数据)