SQL1 各个视频的平均完播率
select
video_id
,round(sum(if(timestampdiff(second,start_time,end_time)>=duration,1,0))/count(1),3) avg_comp_play_rate
from tb_video_info
left join tb_user_video_log using(video_id)
where year(start_time)='2021'
group by video_id
order by avg_comp_play_rate desc;
SQL2 平均播放进度大于60%的视频类别
with temp as(
select
tag
,if(timestampdiff(second,start_time,end_time)/duration*100>100,100,timestampdiff(second,start_time,end_time)/duration*100) as play_progress
from tb_video_info
left join tb_user_video_log using(video_id)
where start_time is not null
)
select
tag
,concat(round(avg(play_progress),2),'%') avg_play_progress
from temp
group by tag
having avg(play_progress)>60
order by avg_play_progress desc;
SQL3 每类视频近一个月的转发量/率
with temp as (
select
tag
,if_retweet
,if(start_time,1,0) if_hudong
,date_format(start_time,'%Y%m%d') now_time
,date_format(date_sub(max(date_format(start_time,'%Y%m%d')) over(),interval 29 day),'%Y%m%d') as starttime
,max(date_format(start_time,'%Y%m%d')) over() as endtime
from tb_video_info
left join tb_user_video_log using(video_id)
where start_time is not null
)
select
tag
,sum(if_retweet) retween_cnt
,round(sum(if_retweet)/sum(if_hudong),3) retweet_rate
from temp
where now_time between starttime and endtime
group by tag
order by retweet_rate desc;
SQL4 每个创作者每月的涨粉率及截止当前的总粉丝量
with temp as(
select
author
,date_format(start_time,'%Y-%m') as month
,sum(case when if_follow=1 then 1 else 0 end) as fans_add
,sum(case when if_follow=2 then 1 else 0 end) as fans_sub
,sum(case when start_time then 1 else 0 end)as play
from tb_video_info
left join tb_user_video_log using(video_id)
where start_time is not null and year(start_time)='2021'
group by author,month
)
select
author
,month
,round((fans_add-fans_sub)/play,3) as fans_growth_rate
,sum(fans_add-fans_sub) over(partition by author order by month asc) as total_fans
from temp
order by author,total_fans
SQL5 国庆期间每类视频点赞量和转发量
with temp as (
select
tag
,date_format(start_time,'%Y-%m-%d') dt
,sum(if_like) like_cnt
,sum(if_retweet) retweet_cnt
from tb_user_video_log
left join tb_video_info using(video_id)
where author is not null and date_format(start_time,'%Y-%m-%d') between '2021-09-25' and '2021-10-03'
group by tag,dt
)
select *
from (
select
tag
,dt
,sum(like_cnt) over(partition by tag order by dt rows between 6 preceding and current row) as sum_like_cnt_7d
,max(retweet_cnt) over(partition by tag order by dt rows between 6 preceding and current row) as max_like_cnt_7d
from temp
) t
where dt='2021-10-01' or dt='2021-10-02'or dt='2021-10-03'
order by tag desc,dt asc;
SQL6 近一个月发布的视频中热度最高的top3视频
select
video_id
,round((100*comp_play_rate+5*like_cnt+3*comment_cnt+2*retweet_cnt)/(timestampdiff(day,recently_end_date,cur_date)+1),0) as hot_index
from(
select
video_id
,avg(if(timestampdiff(second,start_time,end_time)>=duration,1,0)) as comp_play_rate
,sum(if_like) as like_cnt
,count(comment_id) as comment_cnt
,sum(if_retweet) as retweet_cnt
,max(date(end_time)) as recently_end_date
,max(cur_date) as cur_date
,max(date(release_time)) as release_date
from tb_user_video_log
left join tb_video_info using(video_id)
left join(select max(end_time) as cur_date
from tb_user_video_log) as t_max_date on 1
where start_time is not null
group by video_id
having timestampdiff(day,release_date,cur_date) < 30
) t_video_info
order by hot_index desc
limit 3;
SQL7 2021年11月每天的人均浏览文章时长
select
date(in_time) as dt
,round(sum(timestampdiff(second,in_time,out_time))/count(distinct uid),1) as avg_view_len_sec
from tb_user_log
where date_format(in_time,'%Y%m')='202111' and artical_id!=0
group by dt
order by avg_view_len_sec asc
SQL8 每篇文章同一时刻最大在看人数
select
artical_id
,max(instant_viewer_cnt) as max_uv
from(
select
artical_id
,dt
,sum(diff) over(partition by artical_id order by dt,diff desc) as instant_viewer_cnt
from(
select artical_id,in_time as dt, 1 as diff
from tb_user_log
where artical_id != 0
union all
select artical_id,out_time as dt, -1 as diff
from tb_user_log
where artical_id != 0
) t1
) t2
group by artical_id
order by 2 desc
SQL9 2021年11月每天新用户的次日留存率
with temp as (
select
uid
,dt
,count(*) over(partition by uid order by dt) as times
from (
select uid,date(in_time) as dt
from tb_user_log
union
select uid,date(out_time) as dt
from tb_user_log
) t
)
select
today.dt as dt
,round(count(tomorrow.dt)/count(*),2) as uv_left_rate
from temp today
left join temp tomorrow
on today.uid=tomorrow.uid and tomorrow.dt = timestampadd(day,1,today.dt)
where today.times=1 and today.dt like'2021-11%'
group by 1
order by 1
SQL10 统计活跃间隔对用户分级结果
with temp as (
select
uid
,'新晋用户' user_grade
from tb_user_log
group by uid
having min(date(in_time)) between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
union all
select
uid
,'忠实用户' user_grade
from tb_user_log
group by uid
having max(date(in_time)) between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
and min(date(in_time)) not between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
union all
select
uid
,'沉睡用户' user_grade
from tb_user_log
group by uid
having max(date(in_time)) < date_sub((select max(date(in_time))from tb_user_log),interval 6 day)
and min(date(in_time)) >= date_sub((select max(date(in_time))from tb_user_log),interval 29 day)
union all
select
uid
,'流失用户' user_grade
from tb_user_log
group by uid
having max(date(in_time)) < date_sub((select max(date(in_time))from tb_user_log),interval 29 day)
)
select
user_grade
,round(count(*)/(select count(*) from temp),2) as ratio
from temp
group by user_grade
order by 2 desc
SQL11 每天的日活数及新用户占比
with temp as(
select
uid
,dt
,count(*) over(partition by uid order by dt) times
from (
select
uid
,date(in_time) as dt
from tb_user_log
union
select
uid
,date(out_time) as dt
from tb_user_log
) t
)
select
dt
,count(*) dau
,round(avg(case when times=1 then 1 else 0 end),2) uv_new_ratio
from temp
group by dt
order by 1
SQL12 连续签到领金币
with temp as (
select
uid
,dt
,case
when (row_number() over(partition by uid,sub order by dt)%7)=3 then 3
when (row_number() over(partition by uid,sub order by dt)%7)=0 then 7
else 1
end as rn1
from (
select
uid
,dt
,row_number() over(partition by uid order by dt) rn
,date_sub(dt,interval row_number() over(partition by uid order by dt) day) as sub
from(
select
uid
,date(in_time) as dt
from tb_user_log
where artical_id=0 and sign_in=1
and date(in_time) between '2021-07-07' and '2021-10-31'
) t1
)t2
)
select
uid
,date_format(dt,'%Y%m') as month
,sum(rn1) as coin
from temp
group by uid,month
SQL13 计算商城中2021年每月的GMV
select
date_format(event_time,'%Y-%m') as month
,sum(if(total_amount>0,total_amount,0)) as GMV
from tb_order_overall
where year(event_time)='2021'
group by month
having GMV>100000
order by GMV
SQL14 统计2021年10月每个退货率不大于0.5的商品各项指标
select
product_id
,round(avg(if_click),3) as ctr
,round(sum(if_cart)/sum(if_click),3) as cart_rate
,round(sum(if_payment)/sum(if_cart),3) as payment_rate
,round(sum(if_refund)/sum(if_payment),3) as refund_rate
from tb_user_event
where date_format(event_time,'%Y%m')='202110'
group by product_id
order by product_id asc
SQL15 某店铺的各商品毛利率及店铺整体毛利率
(select
'店铺汇总' as product_id
,concat(round((1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100,1),'%') as profit_rate
from tb_product_info i
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.shop_id='901' and date_format(event_time,'%Y%m')>='202110'
group by i.shop_id)
union all
(select
d.product_id as product_id
,concat(round((1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100,1),'%') as profit_rate
from tb_product_info i
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.shop_id='901' and date_format(event_time,'%Y%m')>='202110'
group by d.product_id
having (1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100>24.9
order by product_id
)
SQL16 零食类商品中复购率top3高的商品
with temp as (
select
i.product_id as product_id
,o.uid as uid
,count(*) as cnt
from tb_product_info i
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.tag='零食' and date(o.event_time) >=(select date(date_sub(max(event_time),interval 89 day))
from tb_order_overall)
group by i.product_id,o.uid
)
select
product_id
,round(avg(case when cnt>=2 then 1 else 0 end),3) as repurchase_rate
from temp
group by product_id
order by repurchase_rate desc,product_id asc
limit 3
SQL17 10月的新户客单价和获客成本
select round(sum(total_amount)/count(order_id),1) avg_amount,
round(avg(cost),1) avg_cost
from (select a.order_id,
total_amount,
(sum(price*cnt) - total_amount) as cost
from tb_order_detail a
left join tb_order_overall b
on a.order_id = b.order_id
where date_format(event_time,'%Y-%m') = '2021-10'
and (uid,event_time) in (select uid ,min(event_time)
from tb_order_overall
GROUP BY uid )
GROUP BY a.order_id) a
SQL18 店铺901国庆期间的7日动销率和滞销率
with a as(
select distinct date(event_time) dt
from tb_order_overall
where date(event_time) between '2021-10-01' and '2021-10-03'
),b as (
select date(too.event_time) dt, tod.product_id
from tb_order_detail tod join tb_order_overall too on tod.order_id = too.order_id
join tb_product_info tpi on tod.product_id = tpi.product_id
where tpi.shop_id = 901 and too.status = 1
)
select
a.dt
,round(count(distinct b.product_id)/(select count(product_id) from tb_product_info where date(release_time)<=a.dt and shop_id=901),3) sale_rate
,round(1-(count(distinct b.product_id)/(select count(product_id) from tb_product_info where date(release_time)<=a.dt and shop_id=901)),3) as unsale_rate
from a left join b on datediff(a.dt,b.dt) between 0 and 6
group by a.dt
order by a.dt
SQL19 2021年国庆在北京接单3次及以上的司机统计信息
with temp as (
select
driver_id
,count(order_id) order_num
,sum(fare) income
from tb_get_car_record
left join tb_get_car_order using(order_id)
where date(event_time) >= '2021-10-01' and
date(finish_time) <= '2021-10-07' and finish_time is not null
and city='北京'
group by driver_id
having count(driver_id) >=3
)
select
'北京' as city
,round(avg(order_num),3) as avg_order_num
,round(avg(income),3) as avg_income
from temp
SQL20 有取消订单记录的司机平均评分
select
coalesce(driver_id,"总体")
,round(sum(grade)/count(grade),1) as avg_grade
from tb_get_car_order
where driver_id in (
select driver_id
from tb_get_car_order
where start_time is null
and date_format(order_time,'%Y%m')='202110'
)
group by driver_id with rollup
SQL21 每个城市中评分最高的司机信息
select
t3.city
,t3.driver_id
,round(avg(tb_get_car_order.grade),1) as avg_grade
,round(count(1)/count(distinct date(order_time)),1) as avg_order_num
,round(sum(mileage)/count(distinct date(order_time)),3) as avg_mileage
from(
select
city
,driver_id
from(
select
city
,driver_id
,dense_rank() over(partition by city order by grade desc) rk
from(
select
city
,driver_id
,avg(grade) as grade
from tb_get_car_record
join tb_get_car_order using(order_id)
where fare is not null
group by city,driver_id
) t1
) t2
where rk=1
) t3 left join tb_get_car_order on t3.driver_id=tb_get_car_order.driver_id
group by city,driver_id
order by avg_order_num asc
SQL22 国庆期间近7日日均取消订单量
select
dt, finish_num_7d, cancel_num_7d
from (
select
dt
,round(avg(finish) over(order by dt rows 6 preceding),2) as finish_num_7d
,round(avg(cancel) over(order by dt rows 6 preceding),2) as cancel_num_7d
from(
select
date(order_time) as dt
,count(start_time) as finish
,count(1)-count(start_time) as cancel
from tb_get_car_order
group by date(order_time)
) t
) t1
where dt>='2021-10-01' and dt<='2021-10-03'
SQL23 工作日各时段叫车量、等待接单时间和调度时间
select
dif
,count(*) as get_car_num
,round(avg(wait_time),1) as avg_wait_time
,round(avg(dispatch_time),1) as avg_dispatch_time
from(
select
case
when date_format(event_time,'%H:%i:%s') >= '07:00:00' and date_format(event_time,'%H:%i:%s') < '09:00:00' then '早高峰'
when date_format(event_time,'%H:%i:%s') >= '09:00:00' and date_format(event_time,'%H:%i:%s') < '17:00:00' then '工作时间'
when date_format(event_time,'%H:%i:%s') >= '17:00:00' and date_format(event_time,'%H:%i:%s') < '20:00:00' then '晚高峰'
else '休息时间'
end as dif
,timestampdiff(second,event_time,order_time)/60 as wait_time
,timestampdiff(second,order_time,start_time)/60 as dispatch_time
from tb_get_car_record
left join tb_get_car_order using(order_id)
where date_format(event_time,'%W') not in ('Saturday','Sunday')
) t
group by dif
order by get_car_num asc
SQL24 各城市最大同时等车人数
with a as (
select city,uid,event_time as dt,1 as diff
from tb_get_car_record
where date_format(event_time,'%Y-%m')='2021-10'
union
select city,uid,end_time as dt,-1 as diff
from tb_get_car_record
where date_format(event_time,'%Y-%m')='2021-10' and order_id is null
union
select city,tb_get_car_order.uid,finish_time as dt,-1 as diff
from tb_get_car_order
left join tb_get_car_record using(order_id)
where date_format(event_time,'%Y-%m')='2021-10' and fare is null
union
select city,tb_get_car_order.uid,start_time as dt,-1 as diff
from tb_get_car_order
left join tb_get_car_record using(order_id)
where date_format(event_time,'%Y-%m')='2021-10'
)
select city,max(num) as number
from (
select
city,sum(diff) over(partition by city,date_format(dt,'%Y-%m') order by dt asc,diff desc) as num
from a
) t
group by city
order by number asc
SQL25 某宝店铺的SPU数量
select
style_id
,count(*) as SPU_num
from product_tb
group by style_id
order by SPU_num desc
SQL26 某宝店铺的实际销售额与客单价
select
sum(sales_price) as sales_total
,round(sum(sales_price)/count(distinct user_id),2)per_trans
from sales_tb
SQL27 某宝店铺折扣率
select
round(sum(sales_price)/sum(sales_num*tag_price)*100,2)
from sales_tb
left join product_tb using(item_id)
SQL28 某宝店铺动销率与售罄率
with a as (
select
style_id
,sum(inventory) as total_inventory
,sum(tag_price*inventory) as inventory_price
from product_tb
group by 1
),
b as (
select
style_id
,sum(sales_price) GMV
,sum(sales_num) sales_num_total
from sales_tb
left join product_tb using(item_id)
group by style_id
)
select
style_id
,round(sales_num_total/(total_inventory-sales_num_total)*100,2) pin_rate
,round(GMV/inventory_price*100,2) sell_through_rate
from a join b using(style_id)
order by 1
SQL29 某宝店铺连续2天及以上购物的用户及其对应的天数
select
user_id
,count(*) as days_count
from (
select
user_id
,sales_date
,row_number() over(partition by user_id order by sales_date) as rn
from sales_tb
) t
group by user_id,date_sub(sales_date,interval rn day)
having days_count>=2
SQL30 牛客直播转换率
select
course_id
,course_name
,round(sum(if_sign)/sum(if_vw)*100,2) as 'sign_rate(%)'
from course_tb
left join behavior_tb using(course_id)
group by course_id,course_name
order by course_id
SQL31 牛客直播开始时各直播间在线人数
select
course_id
,course_name
,count(*) as online_num
from course_tb
left join attend_tb using(course_id)
where date_format(in_datetime,'%H%i')<='1900' and
date_format(out_datetime,'%H%i')>='1900'
group by course_id,course_name
SQL32 牛客直播各科目平均观看时长
select
course_name
,round(sum(timestampdiff(minute,in_datetime,out_datetime))/count(*),2) as avg_Len
from course_tb
left join attend_tb using(course_id)
group by course_name
order by avg_Len desc
SQL33 牛客直播各科目出勤率
with a as (
select course_id,sum(if_sign) as sign_total
from behavior_tb
group by course_id
),b as (
select course_id,count(distinct user_id) total_10
from attend_tb
where timestampdiff(minute,in_datetime,out_datetime)>=10
group by course_id
)
select te.course_id,t.course_name,te.rate
from (
select a.course_id as course_id,round(total_10/sign_total*100,2) as rate
from a left join b on a.course_id=b.course_id
) te join course_tb t on t.course_id=te.course_id
SQL34 牛客直播各科目同时在线人数
select course_id,course_name,num_max
from(
select course_id,max(num) as num_max
from (
select
course_id
,sum(diff) over(partition by course_id order by df asc,diff desc) as num
from(
select user_id,course_id,in_datetime as df, 1 as diff
from attend_tb
union
select user_id,course_id,out_datetime as df, -1 as diff
from attend_tb
) t
) t1
group by course_id
) t2 join course_tb using (course_id)
SQL35 某乎问答11月份日人均回答量
select
date(answer_date)
,round(count(issue_id)/count(distinct author_id),2)
from answer_tb
where date_format(answer_date,'%Y%m')='202111'
group by date(answer_date)
order by date(answer_date)
SQL36 某乎问答高质量的回答中用户属于各级别的数量
select
case
when author_level between 1 and 2 then '1-2级'
when author_level between 3 and 4 then '3-4级'
when author_level between 5 and 6 then '5-6级'
end as level_cut,count(issue_id) as num
from author_tb left join answer_tb using(author_id)
where char_len>=100
group by level_cut
order by num desc
SQL37 某乎问答单日回答问题数大于等于3个的所有用户
select
date(answer_date)
,author_id
,count(1) as answer_cnt
from answer_tb
where date_format(answer_date,'%Y%m')='202111'
group by date(answer_date),author_id
having answer_cnt>=3
order by date(answer_date),author_id
SQL38 某乎问答回答过教育类问题的用户里有多少用户回答过职场类问题
select count(distinct author_id)
from answer_tb
where author_id in (select author_id
from answer_tb
where issue_id in (select issue_id
from issue_tb
where issue_type='Education'))
and issue_id in (select issue_id
from issue_tb
where issue_type='Career')
SQL39 某乎问答最大连续回答问题天数大于等于3天的用户及其对应等级
select author_id ,author_level,num
from (
select
author_id ,author_level,date_sub(answer_date,interval rn day),count(*) as num
from (
select
author_id
,author_level
,answer_date
,row_number() over(partition by author_id order by answer_date) rn
from answer_tb
left join author_tb using(author_id)
) t
group by author_id ,author_level,date_sub(answer_date,interval rn day)
having count(*)>=3
) t1