HIVE-SQL求连续天数

需求:求用户连续登陆天数

1.数据准备

create table tmpdb.test_01 as
select '1001' as user_id, '2017-01-01' as login_date
union all
select '1001' as user_id, '2017-01-02' as login_date
union all
select '1001' as user_id, '2017-01-04' as login_date
union all
select '1001' as user_id, '2017-01-06' as login_date
union all
select '1001' as user_id, '2017-01-07' as login_date
;

2.步骤拆解

2.1 求差值

select
	user_id
	,login_date
	,row_number()over(partition by user_id order by login_date) as rn
	,datediff(login_date,'2017-01-01') as day_interval
	,(row_number()over(partition by user_id order by login_date))-
	(datediff(login_date,'2017-01-01')) as diff_value
from tmpdb.test_01
;

HIVE-SQL求连续天数_第1张图片
这里有个规律:连续的日期它们对应的diff_value值是一样的
2.2 求起始、结束、连续天数

select
	user_id
	,min(login_date) as start_date
	,max(login_date) as end_date
	,count(1) as running_days
from
(
select
	user_id
	,login_date
	,row_number()over(partition by user_id order by login_date) as rn
	,datediff(login_date,'2017-01-01') as day_interval
	,(row_number()over(partition by user_id order by login_date))-
	(datediff(login_date,'2017-01-01')) as diff_value
from tmpdb.test_01
) t
group by
user_id
,diff_value
;

02
结论:用一个连续的序列去匹配一个部分连续的序列,如果匹配的结果相同则证明这些元素在部分连续的序列中连续。

你可能感兴趣的:(大数据)