牛客sql_大厂面试真题

SQL1 各个视频的平均完播率

select
    video_id
   ,round(sum(if(timestampdiff(second,start_time,end_time)>=duration,1,0))/count(1),3) avg_comp_play_rate
from tb_video_info
left join tb_user_video_log using(video_id)
where year(start_time)='2021'
group by video_id
order by avg_comp_play_rate desc;

SQL2 平均播放进度大于60%的视频类别

with temp as(
    select 
        tag
        ,if(timestampdiff(second,start_time,end_time)/duration*100>100,100,timestampdiff(second,start_time,end_time)/duration*100) as play_progress
    from tb_video_info
    left join tb_user_video_log using(video_id)
    where start_time is not null 
)

select 
    tag
    ,concat(round(avg(play_progress),2),'%') avg_play_progress
from temp
group by tag
having avg(play_progress)>60
order by avg_play_progress desc;

SQL3 每类视频近一个月的转发量/率

with temp as (
    select 
            tag
            ,if_retweet 
            ,if(start_time,1,0) if_hudong
            ,date_format(start_time,'%Y%m%d') now_time
            ,date_format(date_sub(max(date_format(start_time,'%Y%m%d')) over(),interval 29 day),'%Y%m%d') as starttime
            ,max(date_format(start_time,'%Y%m%d')) over() as endtime
        from tb_video_info 
        left join tb_user_video_log using(video_id)
        where start_time is not null
)

select 
    tag
    ,sum(if_retweet) retween_cnt
    ,round(sum(if_retweet)/sum(if_hudong),3) retweet_rate
from temp
where now_time between starttime and endtime
group by tag
order by retweet_rate desc;

SQL4 每个创作者每月的涨粉率及截止当前的总粉丝量

with temp as(
select
    author
    ,date_format(start_time,'%Y-%m') as month
    ,sum(case when if_follow=1 then 1 else 0 end) as fans_add -- 增加粉丝数量
    ,sum(case when if_follow=2 then 1 else 0 end) as fans_sub -- 减少粉丝数量
    ,sum(case when start_time then 1 else 0 end)as play       -- 播放量
from tb_video_info
left join tb_user_video_log using(video_id)
where start_time is not null and year(start_time)='2021'
group by author,month
)

select 
    author
    ,month
    ,round((fans_add-fans_sub)/play,3) as fans_growth_rate
    ,sum(fans_add-fans_sub) over(partition by author order by month asc) as total_fans
from temp
order by author,total_fans

SQL5 国庆期间每类视频点赞量和转发量

with temp as (
    select 
        tag
        ,date_format(start_time,'%Y-%m-%d') dt
        ,sum(if_like) like_cnt
        ,sum(if_retweet) retweet_cnt
    from tb_user_video_log
    left join tb_video_info using(video_id)
    where author is not null and date_format(start_time,'%Y-%m-%d') between '2021-09-25' and '2021-10-03'
    group by tag,dt
)

select *
    from (
    select 
        tag
        ,dt
        ,sum(like_cnt) over(partition by tag order by dt rows between 6 preceding and current row) as sum_like_cnt_7d
        ,max(retweet_cnt) over(partition by tag order by dt rows between 6 preceding and current row) as max_like_cnt_7d
    from temp
) t
where dt='2021-10-01' or dt='2021-10-02'or dt='2021-10-03'
order by tag desc,dt asc;

SQL6 近一个月发布的视频中热度最高的top3视频

/*
select   timestampdiff(day, '2021-09-30', '2021-10-04 23:00:35') as hot_index; 
-- 结果4 以2021-09-30 00:00:00开始
select   timestampdiff(day, '2021-09-30 00:00:36', '2021-10-04 00:00:35') as ahot_index;
-- 结果3 不到4天
*/
select
	video_id
	,round((100*comp_play_rate+5*like_cnt+3*comment_cnt+2*retweet_cnt)/(timestampdiff(day,recently_end_date,cur_date)+1),0) as hot_index
from(
	select 
	    video_id
	    ,avg(if(timestampdiff(second,start_time,end_time)>=duration,1,0)) as comp_play_rate
	    ,sum(if_like)  as like_cnt
	    ,count(comment_id) as comment_cnt
	    ,sum(if_retweet) as retweet_cnt
	    ,max(date(end_time)) as recently_end_date
	    ,max(cur_date) as cur_date
	    ,max(date(release_time)) as release_date
	from tb_user_video_log
	left join tb_video_info using(video_id)
	left join(select max(end_time) as cur_date
		  from tb_user_video_log) as t_max_date on 1
    where start_time is not null
	group by video_id
	having timestampdiff(day,release_date,cur_date) < 30
) t_video_info
order by hot_index desc
limit 3;

SQL7 2021年11月每天的人均浏览文章时长

select 
    date(in_time) as dt
    ,round(sum(timestampdiff(second,in_time,out_time))/count(distinct uid),1) as avg_view_len_sec
--    ,count(1)
from tb_user_log
where date_format(in_time,'%Y%m')='202111' and artical_id!=0
group by dt
order by avg_view_len_sec asc

SQL8 每篇文章同一时刻最大在看人数

select 
    artical_id
    ,max(instant_viewer_cnt) as max_uv
from(
    select 
        artical_id
        ,dt
        ,sum(diff) over(partition by artical_id order by dt,diff desc) as instant_viewer_cnt
    from(
        select artical_id,in_time as dt, 1 as diff
        from tb_user_log
        where artical_id != 0
        union all 
        select artical_id,out_time as dt, -1 as diff
        from tb_user_log
        where artical_id != 0
        ) t1
) t2
group by artical_id
order by 2 desc

/*
步骤拆解:
    1、对原表编码并联立;
    2、按artical_id维度,dt升序 ,diff降序,对diff进行SUM开窗统计,得到每个artical_id的瞬时观看人数instant_viewer_cnt;
    3、最外层SELECT按artical_id聚合,通过MAX(instant_viewer_cnt)取出瞬时观看最大值max_uv,并排序。
题目要求在瞬时统计时遵循【先进后出】:如果同一时刻有进入也有离开时,先记录用户数增加,再记录减少。
*/

SQL9 2021年11月每天新用户的次日留存率

with temp as (
    select 
        uid
        ,dt
        ,count(*) over(partition by uid order by dt) as times
    from (
        select uid,date(in_time) as dt
        from tb_user_log
        union 
        select uid,date(out_time) as dt
        from tb_user_log
    ) t
)

select 
    today.dt as dt
    ,round(count(tomorrow.dt)/count(*),2) as uv_left_rate
from temp today
left join temp tomorrow 
on today.uid=tomorrow.uid and tomorrow.dt = timestampadd(day,1,today.dt)
where today.times=1 and today.dt like'2021-11%'
group by 1
order by 1

/*
 1、先将跨天的日期表生成,这种简单的行相加就用union函数就可以,其中union all不去重 而union则会自动过滤重复的行 
 2、如何判断新客户,可以理解以前没有出现过,所以可以统计累计出现次数,之前的题目可以学习到 sum+ over(order by)可以求累计和 
 3、一般看留存率的题目都可以使用左连接,这样第二天的数据如果缺少就会是null值,非常的常用 
 4、一个常用技巧,使用with temp as()
*/

SQL10 统计活跃间隔对用户分级结果

-- 当天日期
-- select max(date(in_time)) from tb_user_log
with temp as (
    -- 新晋用户
    select 
        uid
        ,'新晋用户' user_grade
    from tb_user_log
    group by uid
    having min(date(in_time)) between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
    union all
    -- 忠实用户
    select 
         uid
        ,'忠实用户' user_grade
    from tb_user_log
    group by uid
    having max(date(in_time)) between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
    and min(date(in_time)) not between date_sub((select max(date(in_time))from tb_user_log),interval 6 day) and (select max(date(in_time)) from tb_user_log)
    union all
    -- 沉睡用户
    select 
         uid
        ,'沉睡用户' user_grade
    from tb_user_log
    group by uid
    having max(date(in_time)) < date_sub((select max(date(in_time))from tb_user_log),interval 6 day) 
    and min(date(in_time)) >= date_sub((select max(date(in_time))from tb_user_log),interval 29 day) 
    union all
    -- 流失用户
    select 
         uid
        ,'流失用户' user_grade
    from tb_user_log
    group by uid
    having max(date(in_time)) < date_sub((select max(date(in_time))from tb_user_log),interval 29 day) 
)

select 
    user_grade
    ,round(count(*)/(select count(*) from temp),2) as ratio
from temp
group by user_grade
order by 2 desc

SQL11 每天的日活数及新用户占比

with temp as(
    select 
        uid
        ,dt
        ,count(*) over(partition by uid order by dt) times
    from (
        -- 用户每日登记表
        select
            uid
            ,date(in_time) as dt
        from tb_user_log
        union 
        select
            uid
            ,date(out_time) as dt
        from tb_user_log
    ) t
)

select
    dt
    ,count(*) dau
    ,round(avg(case when times=1 then 1 else 0 end),2) uv_new_ratio 
from temp
group by dt
order by 1

SQL12 连续签到领金币

/*
1、先根据日期条件和登录筛选表得到原始表
2、用row_number()和日期的差值来判断是否连续
3、根据签到规则连续七天用求余数的方法,然后case end 判断
注意(坑):分组partition的时候要更具uid,sub进行分组,排除不同用户sub值想要的情况
4、用with as 产生临时表,最后对temp分组累加
*/
with temp as (
    select 
        uid
        ,dt
        ,case 
        when (row_number() over(partition by uid,sub order by dt)%7)=3 then 3 
        when (row_number() over(partition by uid,sub order by dt)%7)=0 then 7
        else 1
        end as rn1
    from (
        select
            uid
            ,dt
            ,row_number() over(partition by uid order by dt) rn
            ,date_sub(dt,interval row_number() over(partition by uid order by dt) day) as sub
        from(
            select 
                uid
                ,date(in_time) as dt
        from tb_user_log
        where artical_id=0 and sign_in=1 
        and date(in_time) between '2021-07-07' and '2021-10-31'
        ) t1
    )t2
)

select 
    uid
    ,date_format(dt,'%Y%m') as month
    ,sum(rn1) as coin
from temp
group by uid,month

SQL13 计算商城中2021年每月的GMV

select 
    date_format(event_time,'%Y-%m') as month
    ,sum(if(total_amount>0,total_amount,0)) as GMV
from tb_order_overall
where year(event_time)='2021'
group by month
having GMV>100000
order by GMV

SQL14 统计2021年10月每个退货率不大于0.5的商品各项指标

select 
    product_id
    ,round(avg(if_click),3) as ctr
    ,round(sum(if_cart)/sum(if_click),3) as cart_rate
    ,round(sum(if_payment)/sum(if_cart),3) as payment_rate
    ,round(sum(if_refund)/sum(if_payment),3) as refund_rate
from tb_user_event
where date_format(event_time,'%Y%m')='202110'
group by product_id
order by product_id asc

SQL15 某店铺的各商品毛利率及店铺整体毛利率

(select 
    '店铺汇总' as product_id
--    ,sum(d.price*d.cnt) -- 销售总额
--    ,sum(d.cnt*i.in_price) -- 进价总额
    ,concat(round((1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100,1),'%') as profit_rate
from tb_product_info i
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.shop_id='901' and date_format(event_time,'%Y%m')>='202110'
group by i.shop_id)
union all
(select 
    d.product_id as product_id
--    ,sum(d.price*d.cnt) -- 销售总额
--    ,sum(d.cnt*i.in_price) -- 进价总额
    ,concat(round((1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100,1),'%') as profit_rate
from tb_product_info i
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.shop_id='901' and date_format(event_time,'%Y%m')>='202110'
group by d.product_id
 having (1-sum(d.cnt*i.in_price)/sum(d.price*d.cnt))*100>24.9
order by product_id
 )

SQL16 零食类商品中复购率top3高的商品

with temp as (
select 
    i.product_id as product_id -- 商品ID
    ,o.uid as uid  -- 用户ID
    ,count(*) as cnt  -- 商品被用户购买次数
from tb_product_info i 
left join tb_order_detail d on i.product_id=d.product_id
left join tb_order_overall o on o.order_id=d.order_id
where i.tag='零食' and date(o.event_time) >=(select date(date_sub(max(event_time),interval 89 day)) 
                                            from tb_order_overall)
group by i.product_id,o.uid
)

select 
    product_id
    ,round(avg(case when cnt>=2 then 1 else 0 end),3)  as repurchase_rate
from temp
group by product_id
order by repurchase_rate desc,product_id asc
limit 3

SQL17 10月的新户客单价和获客成本

select round(sum(total_amount)/count(order_id),1) avg_amount,
       round(avg(cost),1) avg_cost
from (select a.order_id, 
             total_amount,
             (sum(price*cnt) - total_amount) as cost
      from tb_order_detail a
      left join tb_order_overall b
      on a.order_id = b.order_id
      where date_format(event_time,'%Y-%m') = '2021-10' 
            and (uid,event_time) in (select uid ,min(event_time)     -- 用户和其第一次购买的时间
                                     from tb_order_overall
                                     GROUP BY uid )
      GROUP BY a.order_id) a

SQL18 店铺901国庆期间的7日动销率和滞销率

with a as(
select distinct date(event_time) dt
from tb_order_overall
where date(event_time) between '2021-10-01' and '2021-10-03'
),b as (
select date(too.event_time) dt, tod.product_id
from tb_order_detail tod join tb_order_overall too on tod.order_id = too.order_id
                         join tb_product_info tpi on tod.product_id = tpi.product_id
where tpi.shop_id = 901 and too.status = 1
)

select 
    a.dt
    ,round(count(distinct b.product_id)/(select count(product_id) from tb_product_info where date(release_time)<=a.dt and shop_id=901),3) sale_rate
    ,round(1-(count(distinct b.product_id)/(select count(product_id) from tb_product_info where date(release_time)<=a.dt and shop_id=901)),3) as unsale_rate
from a left join b on datediff(a.dt,b.dt) between 0 and 6
group by a.dt
order by a.dt

SQL19 2021年国庆在北京接单3次及以上的司机统计信息

with temp as (
	select
        driver_id
	    ,count(order_id) order_num
	    ,sum(fare) income
	from tb_get_car_record
	left join tb_get_car_order using(order_id)
	where date(event_time) >= '2021-10-01' and 
	date(finish_time) <= '2021-10-07' and finish_time is not null
    and city='北京'
	group by driver_id
	having count(driver_id) >=3
)


select
        '北京' as city 
        ,round(avg(order_num),3) as avg_order_num
        ,round(avg(income),3) as avg_income
from temp

SQL20 有取消订单记录的司机平均评分

select 
    coalesce(driver_id,"总体")
    ,round(sum(grade)/count(grade),1) as avg_grade
from tb_get_car_order
where driver_id in (
    select driver_id
    from tb_get_car_order
    where start_time is null 
    and date_format(order_time,'%Y%m')='202110'
) 
group by driver_id with rollup

SQL21 每个城市中评分最高的司机信息

select
             t3.city
            ,t3.driver_id
            ,round(avg(tb_get_car_order.grade),1) as avg_grade
            ,round(count(1)/count(distinct date(order_time)),1) as avg_order_num
            ,round(sum(mileage)/count(distinct date(order_time)),3) as avg_mileage
from(
    select 
             city
            ,driver_id
    from(
        select 
             city
            ,driver_id
            ,dense_rank() over(partition by city order by grade desc) rk
        from(
            select 
                city
                ,driver_id
                ,avg(grade) as grade
            from tb_get_car_record
            join tb_get_car_order using(order_id)
            where fare is not null
            group by city,driver_id
        ) t1
    ) t2
    where rk=1
) t3 left join tb_get_car_order on t3.driver_id=tb_get_car_order.driver_id
group by  city,driver_id
order by avg_order_num asc

SQL22 国庆期间近7日日均取消订单量

select
    dt, finish_num_7d, cancel_num_7d
from (
    select 
        dt
        ,round(avg(finish) over(order by dt rows 6 preceding),2) as finish_num_7d
        ,round(avg(cancel) over(order by dt rows 6 preceding),2) as cancel_num_7d
    from(
        select  
            date(order_time) as dt
            ,count(start_time) as finish
            ,count(1)-count(start_time) as cancel
        from tb_get_car_order
        group by date(order_time)
    ) t
) t1
where dt>='2021-10-01' and dt<='2021-10-03'

SQL23 工作日各时段叫车量、等待接单时间和调度时间

select 
    dif
    ,count(*) as get_car_num
    ,round(avg(wait_time),1) as avg_wait_time
    ,round(avg(dispatch_time),1) as avg_dispatch_time
from(
select
    case
    when date_format(event_time,'%H:%i:%s') >= '07:00:00' and date_format(event_time,'%H:%i:%s') < '09:00:00' then '早高峰'
    when date_format(event_time,'%H:%i:%s') >= '09:00:00' and date_format(event_time,'%H:%i:%s') < '17:00:00' then '工作时间'
    when date_format(event_time,'%H:%i:%s') >= '17:00:00' and date_format(event_time,'%H:%i:%s') < '20:00:00' then '晚高峰'
    else '休息时间'
    end as dif
    ,timestampdiff(second,event_time,order_time)/60 as wait_time
    ,timestampdiff(second,order_time,start_time)/60 as dispatch_time
from tb_get_car_record
left join tb_get_car_order using(order_id)
where date_format(event_time,'%W')  not in ('Saturday','Sunday')
) t
group by dif
order by get_car_num asc

SQL24 各城市最大同时等车人数

with a as (
select city,uid,event_time as dt,1 as diff
from tb_get_car_record
where date_format(event_time,'%Y-%m')='2021-10'
union
select city,uid,end_time as dt,-1 as diff
from tb_get_car_record
where date_format(event_time,'%Y-%m')='2021-10' and order_id is null
union
select city,tb_get_car_order.uid,finish_time as dt,-1 as diff
from tb_get_car_order
left join tb_get_car_record using(order_id)
where date_format(event_time,'%Y-%m')='2021-10' and fare is null
union 
select city,tb_get_car_order.uid,start_time as dt,-1 as diff
from tb_get_car_order
left join tb_get_car_record using(order_id)
where date_format(event_time,'%Y-%m')='2021-10'
)

select city,max(num) as number
from (
    select 
        city,sum(diff) over(partition by city,date_format(dt,'%Y-%m') order by dt asc,diff desc) as num
    from a 
) t 
group by city
order by number asc

SQL25 某宝店铺的SPU数量

select 
    style_id
    ,count(*) as SPU_num
from product_tb
group by style_id
order by SPU_num desc

SQL26 某宝店铺的实际销售额与客单价

select 
    sum(sales_price) as sales_total
    ,round(sum(sales_price)/count(distinct user_id),2)per_trans
from sales_tb

SQL27 某宝店铺折扣率

select
    round(sum(sales_price)/sum(sales_num*tag_price)*100,2)
from sales_tb
left join product_tb using(item_id)

SQL28 某宝店铺动销率与售罄率

with a as (
select 
    style_id
    ,sum(inventory) as total_inventory
    ,sum(tag_price*inventory) as inventory_price
from product_tb
group by 1
),
b as (
select 
    style_id
    ,sum(sales_price) GMV
    ,sum(sales_num) sales_num_total
from sales_tb 
left join product_tb using(item_id)
group by style_id
)

select 
    style_id
    ,round(sales_num_total/(total_inventory-sales_num_total)*100,2) pin_rate 
    ,round(GMV/inventory_price*100,2) sell_through_rate
from a join b using(style_id)
order by 1

SQL29 某宝店铺连续2天及以上购物的用户及其对应的天数

select 
    user_id
    ,count(*) as days_count
from (
    select 
        user_id
        ,sales_date
        ,row_number() over(partition by user_id order by sales_date) as rn
    from sales_tb
) t
group by user_id,date_sub(sales_date,interval rn day)
having days_count>=2
-- row_number 窗口函数可以保证在重复的情况下,仍然取得连续值

SQL30 牛客直播转换率

select 
    course_id
    ,course_name
    ,round(sum(if_sign)/sum(if_vw)*100,2) as 'sign_rate(%)' 
from course_tb
left join behavior_tb using(course_id)
group by course_id,course_name
order by course_id

SQL31 牛客直播开始时各直播间在线人数

select 
    course_id
    ,course_name
    ,count(*) as online_num 
from course_tb
left join attend_tb using(course_id)
where date_format(in_datetime,'%H%i')<='1900' and
date_format(out_datetime,'%H%i')>='1900'
group by course_id,course_name

SQL32 牛客直播各科目平均观看时长

select 
    course_name
  ,round(sum(timestampdiff(minute,in_datetime,out_datetime))/count(*),2) as avg_Len 
from course_tb
left join attend_tb using(course_id)
group by course_name 
order by avg_Len desc

SQL33 牛客直播各科目出勤率

with a as (
select course_id,sum(if_sign) as sign_total
from behavior_tb
group by course_id
),b as (
select course_id,count(distinct user_id) total_10
from attend_tb
where timestampdiff(minute,in_datetime,out_datetime)>=10
group by course_id
)
select te.course_id,t.course_name,te.rate
from (
select a.course_id as course_id,round(total_10/sign_total*100,2) as rate
from a left join b on a.course_id=b.course_id
) te join course_tb t on t.course_id=te.course_id

SQL34 牛客直播各科目同时在线人数

select course_id,course_name,num_max
from(
select course_id,max(num) as num_max
from (
select 
    course_id
    ,sum(diff) over(partition by course_id order by df asc,diff desc) as num
from(
select user_id,course_id,in_datetime as df, 1 as diff
from attend_tb
union 
select user_id,course_id,out_datetime as df, -1 as diff
from attend_tb
) t
) t1
group by course_id
) t2 join course_tb using (course_id)

SQL35 某乎问答11月份日人均回答量

select 
    date(answer_date)
    ,round(count(issue_id)/count(distinct author_id),2)
from answer_tb
where date_format(answer_date,'%Y%m')='202111'
group by date(answer_date)
order by date(answer_date)

SQL36 某乎问答高质量的回答中用户属于各级别的数量


select
case 
when author_level between 1 and 2 then '1-2级'
when author_level between 3 and 4 then '3-4级'
when author_level between 5 and 6 then '5-6级'
end as level_cut,count(issue_id) as num
from author_tb left join answer_tb using(author_id)
where char_len>=100
group by level_cut
order by num desc

SQL37 某乎问答单日回答问题数大于等于3个的所有用户

select 
    date(answer_date)
    ,author_id	
    ,count(1) as answer_cnt
from answer_tb
where date_format(answer_date,'%Y%m')='202111'
group by date(answer_date),author_id
having answer_cnt>=3
order by date(answer_date),author_id

SQL38 某乎问答回答过教育类问题的用户里有多少用户回答过职场类问题

select count(distinct author_id)
from answer_tb
where author_id in (select author_id 
                    from answer_tb
                    where issue_id in (select issue_id
                    from issue_tb
                    where issue_type='Education')) 
     and issue_id in (select issue_id
                    from issue_tb
                    where issue_type='Career')

SQL39 某乎问答最大连续回答问题天数大于等于3天的用户及其对应等级

select author_id ,author_level,num
from (
	select
		author_id ,author_level,date_sub(answer_date,interval rn day),count(*) as num
	from (
	    select 
		 author_id 
		,author_level
		,answer_date
		,row_number() over(partition by author_id order by answer_date) rn
	    from answer_tb 
	    left join author_tb using(author_id)
	) t
	group by author_id ,author_level,date_sub(answer_date,interval rn day)
	having count(*)>=3
) t1

你可能感兴趣的:(牛客leetcode--刷题,sql,数据库)