drop table temp.tmp_test_room;
create table temp.tmp_test_room (
roomid string,
pt_month string ,
pt_day string
)row format delimited fields terminated by '\t';
在控制台执行命令把文件映射成表
hdfs dfs -put ./p.txt /user/hive/warehouse/temp.db/tmp_test_room
select roomid,to_date(pt_day) pt_day,row_number()over(partition by roomid order by to_date(pt_day) asc) rn
from temp.tmp_test_room where roomid=9999589 and pt_day between '2017-01-01' and '2017-01-16';
结果:
±---------±------------±----±-+
| roomid | pt_day | rn |
±---------±------------±----±-+
| 9999589 | 2017-01-01 | 1 |
| 9999589 | 2017-01-02 | 2 |
| 9999589 | 2017-01-04 | 3 |
| 9999589 | 2017-01-05 | 4 |
| 9999589 | 2017-01-06 | 5 |
| 9999589 | 2017-01-07 | 6 |
| 9999589 | 2017-01-08 | 7 |
| 9999589 | 2017-01-09 | 8 |
| 9999589 | 2017-01-10 | 9 |
| 9999589 | 2017-01-12 | 10 |
| 9999589 | 2017-01-13 | 11 |
| 9999589 | 2017-01-14 | 12 |
| 9999589 | 2017-01-15 | 13 |
| 9999589 | 2017-01-16 | 14 |
2)对用户登录累计天数进行分组
select roomid,date_sub(pt_day,rn) from
(select roomid,to_date(pt_day) pt_day,row_number()over(partition by roomid order by to_date(pt_day) asc) rn
from temp.tmp_test_room where roomid=9999589 and pt_day between '2017-01-01' and '2017-01-16') x
group by roomid,date_sub(pt_day,rn) ;
结果:
±---------±------------±-+
| roomid | _c1 |
±---------±------------±-+
| 9999589 | 2016-12-31 |
| 9999589 | 2017-01-01 |
| 9999589 | 2017-01-02 |
3)找到用户登录各个时段的最大、最小日期
select roomid,min(pt_day) continuity_first_day,max(pt_day) continuity_last_day,count(*) continuity_days
from (select roomid,to_date(pt_day) pt_day,row_number()over(partition by roomid order by to_date(pt_day) asc) rn
from temp.tmp_test_room where roomid=9999589 and pt_day between '2017-01-01' and '2017-01-16') x
group by roomid,date_sub(pt_day,rn);
结果:
±---------±----------------------±---------------------±-----------------±-+
| roomid | continuity_first_day | continuity_last_day | continuity_days |
±---------±----------------------±---------------------±-----------------±-+
| 9999589 | 2017-01-01 | 2017-01-02 | 2 |
| 9999589 | 2017-01-04 | 2017-01-10 | 7 |
| 9999589 | 2017-01-12 | 2017-01-16 | 5 |
±---------±----------------------±---------------------±-----------------±-+
with t1 as
(select roomid,min(pt_day) continuity_first_day,max(pt_day) continuity_last_day,count(*) continuity_days
from (select roomid,to_date(pt_day) pt_day,row_number()over(partition by roomid order by to_date(pt_day) asc) rn
from temp.tmp_test_room ) x
group by roomid,date_sub(pt_day,rn)
)
select
roomid ,
continuity_first_day,
continuity_last_day,
max(continuity_days) continuity_days, --用户累计登录天数
count(*) except_weekend_continuity_days --用户除周六日外的累计登录天数
from
(select
roomid ,
continuity_first_day,
continuity_last_day,
continuity_days,
date_add(continuity_first_day,pos) as login_day, --爆裂开各个连续登录时段的每一天
pmod(datediff(date_add(continuity_first_day,pos), '2012-01-01'), 7) week_which_day --判断日期是周几
from t1 lateral view posexplode(split(space(datediff(continuity_last_day,continuity_first_day)),' ')) tf as pos,val
) t where week_which_day not in ('6','0')
group by roomid , continuity_first_day,continuity_last_day
======对多个用户的打卡记录进行统计,并找到用户的累计打卡时间最长的时段===================
with t1 as
(select employee_no,min(date_col) continuity_first_day,max(date_col) continuity_last_day,count(*) continuity_days
from (select employee_no,to_date(date_col) date_col,row_number()over(partition by employee_no order by to_date(date_col) asc) rn
from temp.temp_lz_attendance_190723_190822_1 where is_work='Y') x
group by employee_no,date_sub(date_col,rn)
),
t2(select
employee_no,
continuity_first_day,
continuity_last_day,
max(continuity_days) continuity_days, --用户累计登录天数
count(*) except_weekend_continuity_days --用户除周六日外的累计登录天数
from
(select
employee_no,
continuity_first_day,
continuity_last_day,
continuity_days,
date_add(continuity_first_day,pos) as login_day, --爆裂开各个连续登录时段的每一天
pmod(datediff(date_add(continuity_first_day,pos), '2012-01-01'), 7) week_which_day --判断日期是周几
from t1 lateral view posexplode(split(space(datediff(continuity_last_day,continuity_first_day)),' ')) tf as pos,val
) t where week_which_day not in ('6','0')
group by employee_no,continuity_first_day,continuity_last_day
)
select * from (
select employee_no,continuity_first_day,continuity_last_day , except_weekend_continuity_days, row_number() over(partition by employee_no order by except_weekend_continuity_days desc) rn --对用户的连续打卡时长降序排列
from t2
) t
where rn=1