当前工作主要挖数据,分析数据,给领导汇报数据,因此已经练就了挖各种数据,复杂的简单的,只要领导有需求,基本现有的能力都可以支撑。现将写过的脚本整理再次,各位可参考。
以下脚本从简单-复杂,主要参考我司现有业务数据。
表链接方式见我另一篇博文:https://blog.csdn.net/weixin_42388255/article/details/116450944
select ---固定语法,不可修改
os,ver,ip_country,cname ----输出表结构,!!!注意输出的最后一个字段不加 ","
from ---固定语法,不可修改
dwd.xxx ---表名,可以根据需求换不同的表
where date>="${startdate}" and date<="${enddate}" and vid in (${vidlist}) ----作为固定筛选项。
with ---固定语法
A as ---生成A表
(select ---固定语法,不可修改
sid,os,ver,ip_country,cname ----输出表结构,!!!注意输出的最后一个字段不加 ","
from ---固定语法,不可修改
dwd.xxx ---表名,可以根据需求换不同的表
where date>="${startdate}" and date<="${enddate}" and vid in (${vidlist}) ----作为固定筛选项。
),
B as ---生成B表
(select ---固定语法,不可修改
sid,os,ver,ip_country,cname ----输出表结构,!!!注意输出的最后一个字段不加 ","
from ---固定语法,不可修改
dim.xxx ---表名,可以根据需求换不同的表
where date>="${startdate}" and date<="${enddate}" and vid in (${vidlist}) ----作为固定筛选项。
) ----注意这个位置不需要“,”
----A表和B表联合生成新表
select A.sid,A.os,A.ver,A.ip_country,A.cname ---输出新表表头,根据需要输出A表没有的B表头
from
(A
join ----链接方式可修改
B
on A.sid=B.sid -----链接key
)
select vid,date,hour,minute,max(spk_uid_cnt) max_spk_uid_cnt from
(select vid,sid,date,hour,minute,
count(distinct spk_uid ) spk_uid_cnt
from dwd.xxx ---表名,可以根据需求换不同的表
where date>=${startdate} and date<=${enddate}
and vid in (${vidlist})
group by 1,2,3,4,5 --当使用聚合函数,一定要进行group by
)
group by 1,2,3,4
order by date,hour,minute ---order by 目的排序,可有可无,默认从小到大
分位函数求分位数
select
avg(peak_user_cnt) peak_user_cnt_avg,
percentile(peak_user_cnt,array(0.1,0.25,0.5,0.75,0.9)) `峰值在线用户数分位数10_25_50_75_90`
from
(select cname,
max(peak_user_cnt) peak_user_cnt ,
sum(total_user_cnt) total_user_cnt
from dwd.xxx
where date>=${startdate} and date<=${enddate} and vid in (${vidlist})
group by 1)
with cname_user as
(select cname,
sum(total_user_cnt) total_user_cnt,
sum(native_host_sender_cnt) native_host_cnt
from dwd.xxxx
where date>=${startdate} and date<=${enddate} and vid in (${vidlist})
group by 1),
base_date as
(select cname,
count(case when total_user_cnt >=2 and total_user_cnt<5 then 1 else null end ) cnt_total_list_2_5,
count(case when total_user_cnt >=6 and total_user_cnt<10 then 1 else null end ) cnt_total_list_6_10,
count(case when total_user_cnt >=10 and total_user_cnt<20 then 1 else null end ) cnt_total_list_10_20,
count(case when total_user_cnt >=20 then 1 else null end ) cnt_total_list_20plus,
count(case when total_user_cnt >0 then 1 else null end ) cnt_total_list_all,
sum(case when total_user_cnt >0 then total_user_cnt else null end ) sum_total_cnt,
count(case when native_host_cnt =1 then 1 else null end ) cnt_host_list_1,
count(case when native_host_cnt =2 then 1 else null end ) cnt_host_list_2,
count(case when native_host_cnt =3 then 1 else null end ) cnt_host_list_3,
count(case when native_host_cnt =4 then 1 else null end ) cnt_host_list_4,
count(case when native_host_cnt =5 then 1 else null end ) cnt_host_list_5,
count(case when native_host_cnt >=6 and native_host_cnt<10 then 1 else null end ) cnt_host_list_6_10,
count(case when native_host_cnt >=10 and native_host_cnt<20 then 1 else null end ) cnt_host_list_10_20,
count(case when native_host_cnt >=20 then 1 else null end ) cnt_host_list_20plus,
count(case when native_host_cnt >0 then 1 else null end ) cnt_host_list_all,
sum(case when native_host_cnt >0 then native_host_cnt else null end ) sum_host_cnt
from cname_user
group by 1
),
table_send as
(select
sum(cnt_total_list_2_5) cnt_total_list_2_5,
sum(cnt_total_list_6_10) cnt_total_list_6_10,
sum(cnt_total_list_10_20) cnt_total_list_10_20,
sum(cnt_total_list_20plus) cnt_total_list_20plus,
sum(cnt_total_list_all) cnt_total_list_all,
sum(sum_total_cnt) sum_total_cnt,
sum(cnt_host_list_1) cnt_host_list_1,
sum(cnt_host_list_2) cnt_host_list_2,
sum(cnt_host_list_3) cnt_host_list_3,
sum(cnt_host_list_4) cnt_host_list_4,
sum(cnt_host_list_5) cnt_host_list_5,
sum(cnt_host_list_6_10) cnt_host_list_6_10,
sum(cnt_host_list_10_20) cnt_host_list_10_20,
sum(cnt_host_list_20plus) cnt_host_list_20plus,
sum(cnt_host_list_all) cnt_host_list_all,
sum(sum_host_cnt) sum_host_cnt
from base_date)
select
cnt_host_list_1/cnt_host_list_all ratio_host_list_1,
cnt_host_list_2/cnt_host_list_all ratio_host_list_2,
cnt_host_list_3/cnt_host_list_all ratio_host_list_3,
cnt_host_list_4/cnt_host_list_all ratio_host_list_4,
cnt_host_list_5/cnt_host_list_all ratio_host_list_5,
cnt_host_list_6_10/cnt_host_list_all ratio_host_list_6_10,
cnt_host_list_10_20/cnt_host_list_all ratio_host_list_10_20,
cnt_host_list_20plus/cnt_host_list_all ratio_host_list_20plus
from table_send
with vid_info AS
(
(select distinct sid,ver,vid
from xxxx
where date between "${startdate}" and "${enddate}")
),
vid_info2 as
(select distinct A1.sid,ver,vid from
(select distinct sid,ver,vid
from xxxx
where date between "${startdate}" and "${enddate}") as A1
left anti join
(select distinct date,sid
from dwd.xxxx
where date between "${startdate}" and "${enddate}" and spk_screen_share_type_desc <>'UNKNOWN') as B on A1.sid=B.sid
),
s2lv_info AS
(select distinct date,from_unixtime(floor(ts),'HH') hour, from_unixtime(floor(ts),'mm') minute,sid,
max(delay) as delay_max,
max(jitter95) as jitter95_max,
max(400lostRatio) as lost400_max,
avg(delay) as delay_avg,
avg(jitter95) as jitter95_avg,
avg(400lostRatio) as lost400_avg,
count(delay) as daley_cnt,
count(jitter95) as jitter95_cnt,
count(400lostRatio) as 400lostRatio_cnt
from
(select
/*+ broadcast(table_vid)*/
date,
ts,
sid,
voqaStat.delay as delay,
voqaStat.jitter95 as jitter95,
voqaStat.lostRatio as 400lostRatio
from
xxxx table_voqa
left anti join (
select
sid
from
vid_info
) as table_vid
on table_voqa.sid = table_vid.sid
where
table_voqa.date >= "${startdate}"
and table_voqa.date <= "${enddate}"
and table_voqa.name = 's2lv')
group by 1,2,3,4
),
600ms_video_freeze as
(select distinct date,hour,minute,sid,spk_uid,
sum(600ms_freeze_ms) as 600ms_freeze_ms,
sum(600ms_total_ms) as 600ms_total_ms
from dwd.xxxx
where date>=${startdate} and date<=${enddate}
group by 1,2,3,4,5
),
200ms_audio_freeze as
(select distinct date,hour,minute,sid,spk_uid,
sum(200ms_freeze_ms) as 200ms_audio_freeze,
sum(200ms_total_ms) as 200ms_audio_total
from dwd.xxx
where date>=${startdate} and date<=${enddate}
group by 1,2,3,4,5
),
base_table as
(
select vid,ver,
(case when lost400_max =0 then 'lost400max_0'
when lost400_max >0 and lost400_max <= 10 then 'lost400max_0_10'
when lost400_max >10 and lost400_max <= 20 then 'lost400max_10_20'
when lost400_max >20 and lost400_max <= 30 then 'lost400max_20_30'
when lost400_max >30 and lost400_max <= 40 then 'lost400max_30_40'
when lost400_max >40 and lost400_max <= 50 then 'lost400max_40_50'
when lost400_max >50 and lost400_max <= 60 then 'lost400max_50_60'
when lost400_max >60 and lost400_max <= 70 then 'lost400max_60_70'
when lost400_max >70 and lost400_max <= 80 then 'lost400max_70_80'
when lost400_max >80 then 'lost400max_80plus'
else 'unusual'
end) lost400max_type,
(case when lost400_avg =0 then 'lost400avg_0'
when lost400_avg >0 and lost400_avg <= 10 then 'lost400avg_0_10'
when lost400_avg >10 and lost400_avg <= 20 then 'lost400avg_10_20'
when lost400_avg >20 and lost400_avg <= 30 then 'lost400avg_20_30'
when lost400_avg >30 and lost400_avg <= 40 then 'lost400avg_30_40'
when lost400_avg >40 and lost400_avg <= 50 then 'lost400avg_40_50'
when lost400_avg >50 and lost400_avg <= 60 then 'lost400avg_50_60'
when lost400_avg >60 and lost400_avg <= 70 then 'lost400avg_60_70'
when lost400_avg >70 and lost400_avg <= 80 then 'lost400avg_70_80'
when lost400_avg >50 then 'lost400avg_80plus'
else 'unusual'
end) lost400avg_type,
(case when delay_jitter95_summax >0 and delay_jitter95_summax <=100 then '1delay_jitter95_summax_0_100'
when delay_jitter95_summax >100 and delay_jitter95_summax <=200 then '2delay_jitter95_summax_100_200'
when delay_jitter95_summax >200 and delay_jitter95_summax <=400 then '3delay_jitter95_summax_200_400'
when delay_jitter95_summax >400 and delay_jitter95_summax <=800 then '4delay_jitter95_summax_400_800'
when delay_jitter95_summax >800 and delay_jitter95_summax <=1200 then '5delay_jitter95_summax_800_1200'
when delay_jitter95_summax >1200 and delay_jitter95_summax <=1600 then '6delay_jitter95_summax_1200_1600'
when delay_jitter95_summax >1600 and delay_jitter95_summax <=2000 then '7delay_jitter95_summax_1600_2000'
when delay_jitter95_summax >2000 and delay_jitter95_summax <=2500 then '8delay_jitter95_summax_2000_2500'
when delay_jitter95_summax >2500 and delay_jitter95_summax <=3000 then '9delay_jitter95_summax_2500_3000'
when delay_jitter95_summax >3000 then '10delay_jitter95_summax_3000plus'
else 'unusual'
end) delay_jitter95_summaxtype,
(case when delay_jitter95_sumavg>0 and delay_jitter95_sumavg <=100 then '1delay_jitter95_sumavg_0_100'
when delay_jitter95_sumavg >100 and delay_jitter95_sumavg <=200 then '2delay_jitter95_sumavg_100_200'
when delay_jitter95_sumavg >200 and delay_jitter95_sumavg <=400 then '3delay_jitter95_sumavg_200_400'
when delay_jitter95_sumavg >400 and delay_jitter95_sumavg <=800 then '4delay_jitter95_sumavg_400_800'
when delay_jitter95_sumavg >800 and delay_jitter95_sumavg <=1200 then '5delay_jitter95_sumavg_800_1200'
when delay_jitter95_sumavg >1200 and delay_jitter95_sumavg <=1600 then '6delay_jitter95_sumavg_1200_1600'
when delay_jitter95_sumavg >1600 and delay_jitter95_sumavg <=2000 then '7delay_jitter95_sumavg_1600_2000'
when delay_jitter95_sumavg >2000 and delay_jitter95_sumavg <=2500 then '8delay_jitter95_sumavg_2000_2500'
when delay_jitter95_sumavg >2500 and delay_jitter95_sumavg <=3000 then '9delay_jitter95_sumavg_2500_3000'
when delay_jitter95_sumavg >3000 then '10delay_jitter95_sumavg_3000plus'
else 'unusual'
end ) delay_jitter95_sumavgtype,
count(distinct sid,date,hour,minute) sid_cnt,
sum(600ms_freeze_ms) 600ms_freeze_ms,
sum(600ms_total_ms) 600ms_total_ms,
sum(600ms_freeze_ms)/sum(600ms_total_ms) 600ms_video_freeze_ratio,
sum(200ms_audio_freeze) 200ms_audio_freeze,
sum(200ms_audio_total) 200ms_audio_total,
sum(200ms_audio_freeze)/sum(200ms_audio_total) 200ms_audio_freeze_ratio
from
(select distinct F.date,F.hour,F.minute,F.sid,
coalesce(600ms_video_freeze.spk_uid,200ms_audio_freeze.spk_uid) spk_uid,ver,vid,
delay_max,
delay_avg,
jitter95_max,
jitter95_avg,
lost400_max,
lost400_avg,
(delay_max+jitter95_max) delay_jitter95_summax,
(lost400_avg+jitter95_avg) delay_jitter95_sumavg,
600ms_freeze_ms,
600ms_total_ms,
200ms_audio_freeze,
200ms_audio_total
from
( select A1.*,ver,vid from
(select * from s2lv_info
where daley_cnt>=25 and jitter95_cnt>=25 and 400lostRatio_cnt>=25) as A1
join
vid_info2 on A1.sid=vid_info2.sid
) as F
left join
600ms_video_freeze on F.sid=600ms_video_freeze.sid and F.date=600ms_video_freeze.date and F.hour=600ms_video_freeze.hour and F.minute=600ms_video_freeze.minute
left join
200ms_audio_freeze on F.sid=200ms_audio_freeze.sid and F.date=200ms_audio_freeze.date and F.hour=200ms_audio_freeze.hour and F.minute=200ms_audio_freeze.minute )
group by 1,2,3,4,5,6
)
select * from base_table