uid vid starttime endtime
select 'aa' as uid,'v00l' as vid,'2023-10-25 12:00' as starttime,'2023-10-2512:15' as endtime
unionselect 'bb' as uid,'v002' as vid,'2023-10-25 12:05' as starttime,'2023-10-25 12:19' as endtime
结果如下:
--step6、统计每分钟看视频的用户数,并按照时间排序
select
each_online_minute,from_unixtime(each_online_minute,'yyyy-MM-dd hh:mm') as minute_lab bel,count(distinct uid) online_uid_cnt
from
(--step5、形成每个用户看视频的分钟级别的观看记录
select
uid,vid,starttime,starttimestamp, endtime, endtimestamp,min_cnt
,row_number()over(part: ition by uid,vid,starttime order by endtimestamp) rk
,starttimestamp+60*row_number()over(partition by uid,vid,starttime order by endtimest tamp) as each_online_minute
from
(--step4、explode列炸裂
select uid,vid, starttime,starttimestamp ,endtime,endtimestamp, min_cnt,new_repeat
from
(--step3、用repeat产生持续时长(分钟) 个数组
select uid,vid,starttime,starttimestamp,endtime,endtimestamp,min_cnt,repeat('a,',mir n_cnt-1) as repeat_str
from
(--step2、转化为时间戳,且计算看视频持续时长(分钟)
select
uid
,vid
,starttime
,unix_timestamp(starttime,'yyyy-MM-dd hh:mm' ) as starttimestamp
,endtime
,unix_timestamp(endtime, 'yyyy-MM-dd hh:mm' ) as endtimestamp
,(unix_timestamp(endtime,'yyyy-MM-dd hh:mm')-unix_timestamp(starttime,'yyyy-Mr M-dd hh:mm'))/60 as min_cnt
from
(--step1、获取原始数据
select 'aa' as uid,'v0ol' as vid,'2023-10-25 12:00' as starttime,'2023-10-2512:15' as endtime
union
select 'bb' as uid,'v0o2' as vid,'2023-10-25 12:05' as starttime,'2023-10-25 12::19' as endtime
)tb_base
)tb_tmp
)tb_final
lateral view explode(split(repeat_str,',')) tb_tmp as new_repeat
)tb_outer
) tb
group by each_online_minute
order by each_online_minute asc
--step6、统计每分钟看视频的用户数,并按照时间排序
select
each_online_minute,from_unixtime(each_online_minute,'yyyy-MM-dd hh:mm') as minute_lab bel,count(distinct uid) online_uid_cnt
from
(--step5、形成每个用户看视频的分钟级别的观看记录
select
uid,vid,starttime,starttimestamp, endtime, endtimestamp,min_cnt
,starttimestamp+60*repeat_pos as each_online_minute
from
(--step4、explode列炸裂
select uid,vid, starttime,starttimestamp ,endtime,endtimestamp, min_cnt,new_repeat,repeat_pos
from
(--step3、用repeat产生持续时长(分钟) 个数组
select uid,vid,starttime,starttimestamp,endtime,endtimestamp,min_cnt,repeat('a,',mir n_cnt-1) as repeat_str
from
(--step2、转化为时间戳,且计算看视频持续时长(分钟)
select
uid
,vid
,starttime
,unix_timestamp(starttime,'yyyy-MM-dd hh:mm' ) as starttimestamp
,endtime
,unix_timestamp(endtime, 'yyyy-MM-dd hh:mm' ) as endtimestamp
,(unix_timestamp(endtime,'yyyy-MM-dd hh:mm')-unix_timestamp(starttime,'yyyy-Mr M-dd hh:mm'))/60 as min_cnt
from
(--step1、获取原始数据
select 'aa' as uid,'v0ol' as vid,'2023-10-25 12:00' as starttime,'2023-10-2512:15' as endtime
union
select 'bb' as uid,'v0o2' as vid,'2023-10-25 12:05' as starttime,'2023-10-25 12::19' as endtime
)tb_base
)tb_tmp
)tb_final
lateral view posexplode(split(repeat_str,',')) tb_tmp as repeat_pos,new_repeat
)tb_outer
) tb
group by each_online_minute
order by each_online_minute asc
用法:reverse('foobar')参数类型string,返回值string,例如:
select reverse('foobar')
--返回:raboof
用法:space(10)参数类型int,返回值string 例如:
select space(10)
--返回:' '
4.3、repeat返回字符串重复n次后的字符串
用法:repeat('a',10) 例如:
select repeat('a',10)
--返回:'aaaaaaaaaa'
4.3、split将字符串按指定分隔符,拆分为数组 用法:
select split('a,b,c,d',',')
--返回:["a","b","c","d"]