
















create table test_detail(
id   bigint comment '主键',
num  string comment '班级号码',
name string comment '名字',
start_timestamp bigint comment '进入班级时间',
end_timestamp   bigint comment '离开班级时间'
)comment '测试数据明细'
row format delimited fields terminated by '\t'
stored as textfile;

insert into table test_detail values(1,'01','桑稚',1667516488000,1667516519035);
insert into table test_detail values(2,'01','桑稚',1667516519035,1667516529809);
insert into table test_detail values(3,'01','温以凡',1667516519035,1667516529809);
insert into table test_detail values(4,'01','桑稚',1667516529809,1667516533990);
insert into table test_detail values(5,'01','桑稚',1667516533990,1667516538492);

insert into table test_detail values(6,'02','段嘉许',1667525190365,1667525196616);
insert into table test_detail values(7,'02','桑延',1667525190365,1667525196616);
insert into table test_detail values(8,'02','段嘉许',1667525196616,1667525203375);
insert into table test_detail values(9,'02','桑延',1667525203375,1667525207599);
insert into table test_detail values(10,'02','段嘉许',1667525207599,1667525224663);
insert into table test_detail values(11,'02','桑延',1667525224663,1667525229056);
insert into table test_detail values(12,'02','段嘉许',1667525224663,1667525229056);
insert into table test_detail values(13,'02','段嘉许',1667525229056,1667525232773);



    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued' --开始时间等于上一条结束时间且名字等于上一条名字or结束时间等于下一条开始时间且
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail
order by start_timestamp




with is_continue as (
    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued'
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail

     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,case when lag(end_timestamp) over(partition by num,name order by start_timestamp) is null and 
               end_timestamp=lead(start_timestamp) over(partition by num,name order by start_timestamp) then 1
          when lag(end_timestamp) over(partition by num,name order by start_timestamp) is not null
               and start_timestamp<>lag(end_timestamp) over(partition by num,name order by start_timestamp) then 1 
          else 0
      end as start_point --同一班级同一人每个时间段的开始节点,标记为1
from is_continue
where is_continue='continued'  --连续
order by start_timestamp;



with is_continue as (
    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued'
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail
) ,
start_point as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,case when lag(end_timestamp) over(partition by num,name order by start_timestamp) is null and 
               end_timestamp=lead(start_timestamp) over(partition by num,name order by start_timestamp) then 1
          when lag(end_timestamp) over(partition by num,name order by start_timestamp) is not null
               and start_timestamp<>lag(end_timestamp) over(partition by num,name order by start_timestamp) then 1 
          else 0
      end as start_point --同一班级同一人每个时间段的开始节点,标记为1
from is_continue
where is_continue='continued'  
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,sum(start_point) over(partition by num,name order by start_timestamp,end_timestamp
       rows between unbounded preceding and current row ) as group_id --分组id
from start_point
order by start_timestamp;



with is_continue as (
    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued'
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail
start_point as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,case when lag(end_timestamp) over(partition by num,name order by start_timestamp) is null and 
               end_timestamp=lead(start_timestamp) over(partition by num,name order by start_timestamp) then 1
          when lag(end_timestamp) over(partition by num,name order by start_timestamp) is not null
               and start_timestamp<>lag(end_timestamp) over(partition by num,name order by start_timestamp) then 1 
          else 0
      end as start_point --同一班级同一人每个时间段的开始节点,标记为1
from is_continue
where is_continue='continued'  
group_id as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,sum(start_point) over(partition by num,name order by start_timestamp,end_timestamp
       rows between unbounded preceding and current row ) as group_id --分组id
from start_point

     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,group_id        --分组id
    ,min(start_timestamp) over (partition by num,name,group_id) as speech_start --时间段开始时间
    ,max(end_timestamp) over (partition by num,name,group_id) as speech_end     --时间段结束时间
from group_id
order by start_timestamp



with is_continue as (
    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued'
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail
start_point as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,case when lag(end_timestamp) over(partition by num,name order by start_timestamp) is null and 
               end_timestamp=lead(start_timestamp) over(partition by num,name order by start_timestamp) then 1
          when lag(end_timestamp) over(partition by num,name order by start_timestamp) is not null
               and start_timestamp<>lag(end_timestamp) over(partition by num,name order by start_timestamp) then 1 
          else 0
      end as start_point --同一班级同一人每个时间段的开始节点,标记为1
from is_continue
where is_continue='continued'  
group_id as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,sum(start_point) over(partition by num,name order by start_timestamp,end_timestamp
       rows between unbounded preceding and current row ) as group_id --分组id
from start_point
min_max as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,group_id        --分组id
    ,min(start_timestamp) over (partition by num,name,group_id) as talk_start --时间段开始时间
    ,max(end_timestamp) over (partition by num,name,group_id)   as talk_end   --时间段结束时间
from group_id

     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,talk_start      --时间段开始时间
    ,talk_end        --时间段结束时间
    ,concat_ws(' ',collect_set(cast(id as string)) over(partition by num,name,talk_start,talk_end order by start_timestamp asc)) as talk_ids 
from min_max
order by start_timestamp




with is_continue as (
    id              --主键
   ,num             --班级号码
   ,name            --名字
   ,start_timestamp --进入班级时间
   ,end_timestamp   --离开班级时间
   ,case when (start_timestamp=lag(end_timestamp) over(partition by num order by start_timestamp asc )
          and name=lag(name) over(partition by num order by start_timestamp asc )) or
          (end_timestamp=lead(start_timestamp) over (partition by num order by start_timestamp asc)
          and name=lead(name) over(partition by num order by start_timestamp asc )
          then 'continued'
          when lag(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lag(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lag(name,1) over(partition by num order by start_timestamp asc )
          or name=lag(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          when lead(start_timestamp,1) over (partition by num order by start_timestamp asc)
          =lead(start_timestamp,2) over (partition by num order by start_timestamp asc)
          and (name=lead(name,1) over(partition by num order by start_timestamp asc )
          or name=lead(name,2) over(partition by num order by start_timestamp asc ))
          then 'continued'
          else 'discontinued' 
     end   as is_continue
from test_detail
start_point as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,case when lag(end_timestamp) over(partition by num,name order by start_timestamp) is null and 
               end_timestamp=lead(start_timestamp) over(partition by num,name order by start_timestamp) then 1
          when lag(end_timestamp) over(partition by num,name order by start_timestamp) is not null
               and start_timestamp<>lag(end_timestamp) over(partition by num,name order by start_timestamp) then 1 
          else 0
      end as start_point --同一班级同一人每个时间段的开始节点,标记为1
from is_continue
where is_continue='continued'  
group_id as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,sum(start_point) over(partition by num,name order by start_timestamp,end_timestamp
       rows between unbounded preceding and current row ) as group_id --分组id
from start_point
min_max as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,group_id        --分组id
    ,min(start_timestamp) over (partition by num,name,group_id) as talk_start --时间段开始时间
    ,max(end_timestamp) over (partition by num,name,group_id)   as talk_end   --时间段结束时间
from group_id
talk_ids as (
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,talk_start      --时间段开始时间
    ,talk_end        --时间段结束时间
    ,concat_ws(' ',collect_set(cast(id as string)) over(partition by num,name,talk_start,talk_end order by start_timestamp asc)) as talk_ids 
from min_max
     id              --主键
    ,num             --班级号码
    ,name            --名字
    ,start_timestamp --进入班级时间
    ,end_timestamp   --离开班级时间
    ,talk_start      --时间段开始时间
    ,talk_end        --时间段结束时间
    ,talk_ids        --按时间段及时间升序拼接好的id
from talk_ids
where end_timestamp=talk_end
order by start_timestamp


