HIVE中窗口函数的使用

drop table if exists tmp_dw_dahai.fact_clue_state_log;
create table if not exists tmp_dw_dahai.fact_clue_state_log stored as parquet as
select  
a.sale_id 
,a.clue_id
,a.clue_status
,a.start_time
,from_unixtime(unix_timestamp(a.start_time)-60,'yyyy-MM-dd HH:mm:ss')  as start_time_sub_60s
,a.end_time
,lead(a.clue_status,1) over(partition by a.sale_id,a.clue_id order by a.start_time) as next_sale_status --每个学规每个线索的下一个状态
,lead(a.clue_status,1) over(partition by a.clue_id order by a.start_time) as next_clue_status -- 每个线索的下一个状态
,row_number() over (partition by a.sale_id,a.clue_id order by a.start_time ) as num_sale_status 
,row_number() over (partition by a.clue_id order by a.start_time ) as num_status 
from dw_dahai.bas_clue_state_log  a
left join (
select distinct a.clue_id
from 
(select *,row_number() over (partition by clue_id,clue_status) num  
from dw_dahai.bas_clue_state_log 
) a
where a.num>=2 
) b on a.clue_id=b.clue_id
where b.clue_id is null -- 剔除相同状态多次流转的 主要是无效
and a.clue_create_time>='2018-07-01'
;
------------------------------------------------------------

上图代码中用到的函数lead的用法:

LEAD(col,n,DEFAULT) 用于统计窗口内往下第n行值
参数1为列名,参数2为往下第n行(可选,默认为1),参数3为默认值(当往下第n行为NULL时候,取默认值,如不指定,则为NULL)

 

你可能感兴趣的:(数据库,HIVE)