牛客网SQL大厂面试真题(一)

SQL1 各个视频的平均完播率

SELECT log.video_id, ROUND(SUM(IF(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) >= info.duration, 1, 0)) / COUNT(1), 3) AS avg_comp_play_rate
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE YEAR(log.start_time) = 2021
GROUP BY log.video_id
ORDER BY avg_comp_play_rate DESC

SQL2 平均播放进度大于60%的视频类别

SELECT tmp.tag, CONCAT(ROUND(tmp.avg_play_progress * 100, 2), '%') AS avg_play_progress
FROM (
    SELECT info.tag, AVG(LEAST(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) / info.duration, 1)) AS avg_play_progress
    FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
    GROUP BY info.tag
) tmp
WHERE tmp.avg_play_progress > 0.6
ORDER BY tmp.avg_play_progress DESC

SQL3 每类视频近一个月的转发量/率

SELECT info.tag, SUM(log.if_retweet) AS retweet_cut, ROUND(SUM(log.if_retweet) / COUNT(1), 3) AS retweet_rate
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), log.end_time) < 30
GROUP BY info.tag
ORDER BY retweet_rate DESC

SQL4 每个创作者每月的涨粉率及截止当前的总粉丝量

SELECT tmp.author,
       tmp.month,
       ROUND((tmp.plus_follow - tmp.minus_follow) / total_play, 3) AS fans_growth_rate,
       SUM(tmp.plus_follow - tmp.minus_follow) OVER (PARTITION BY tmp.author ORDER BY tmp.month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS total_fans
FROM (
    SELECT info.author, 
           DATE_FORMAT(log.start_time, '%Y-%m') AS month, 
           SUM(IF(log.if_follow = '1', 1, 0)) AS plus_follow, 
           SUM(IF(log.if_follow = '2', 1, 0)) AS minus_follow,
           COUNT(1) AS total_play
    FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
    WHERE YEAR(log.start_time) = '2021'
    GROUP BY info.author, DATE_FORMAT(log.start_time, '%Y-%m')
) tmp
ORDER BY tmp.author, total_fans

SQL5 国庆期间每类视频点赞量和转发量

SELECT output.*
FROM (
    SELECT tmp.tag, 
           tmp.dt,
           SUM(tmp.total_like) OVER (PARTITION BY tmp.tag ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) AS sum_like_cnt_7d,
           MAX(tmp.total_retweet) OVER (PARTITION BY tmp.tag ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) AS max_retweet_cnt_7d
    FROM (
        SELECT info.tag, DATE(log.start_time) AS dt, SUM(log.if_like) AS total_like, SUM(log.if_retweet) AS total_retweet
        FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
        WHERE log.start_time BETWEEN 20210925000000 AND 20211003235959
        GROUP BY info.tag, DATE(log.start_time)
    ) tmp
) output
WHERE output.dt IN ('2021-10-01', '2021-10-02', '2021-10-03')
ORDER BY output.tag DESC, output.dt ASC

SQL6 近一个月发布的视频中热度最高的top3视频

SELECT tmp.video_id, ROUND((100 * tmp.finish_rate + 5 * tmp.total_like + 3 * tmp.total_comment + 2 * tmp.total_retweet) * tmp.fresh_rate, 0) AS hot_index
FROM (
    SELECT log.video_id, 
           SUM(IF(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) >= info.duration, 1, 0)) / COUNT(1) AS finish_rate,
           SUM(log.if_like) AS total_like,
           SUM(IF(log.comment_id IS NOT NULL, 1, 0)) AS total_comment,
           SUM(log.if_retweet) AS total_retweet,
           1 / (1 + DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), MAX(log.end_time))) AS fresh_rate
    FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
    WHERE DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), info.release_time) <= 29
    GROUP BY log.video_id
) tmp
ORDER BY hot_index DESC
LIMIT 3

SQL7 2021年11月每天的人均浏览文章时长

SELECT DATE(out_time) AS dt, ROUND(SUM(TIMESTAMPDIFF(SECOND, in_time, out_time)) / COUNT(DISTINCT uid), 1) AS avg_viiew_len_sec
FROM tb_user_log
WHERE DATE_FORMAT(out_time, '%Y%m') = '202111' AND artical_id <> '0'
GROUP BY DATE(out_time)
ORDER BY avg_viiew_len_sec

SQL8 每篇文章同一时刻最大在看人数

SELECT tmp.artical_id, MAX(tmp.cnt) AS max_uv
FROM (
    SELECT log.artical_id, 
           (SELECT COUNT(1)
            FROM tb_user_log
            WHERE in_time <= log.in_time
                  AND out_time >= log.in_time
                  AND artical_id = log.artical_id) AS cnt
    FROM tb_user_log AS log
    WHERE log.artical_id <> '0'
) tmp
GROUP BY tmp.artical_id
ORDER BY max_uv DESC

SQL9 2021年11月每天新用户的次日留存率

WITH login_ranking AS (
    SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
    FROM (
        SELECT uid, DATE(in_time) AS login_date
        FROM tb_user_log
        UNION
        SELECT uid, DATE(out_time) AS login_date
        FROM tb_user_log
    ) login_log
)
SELECT l1.login_date, ROUND(SUM(IF(l2.uid IS NOT NULL, 1, 0)) / COUNT(1), 2) AS uv_left_rate
FROM login_ranking AS l1 LEFT JOIN login_ranking AS l2 ON l1.uid = l2.uid AND l2.ranking = l1.ranking + 1
WHERE DATE_FORMAT(l1.login_date, '%Y%m') = '202111' AND l1.ranking = 1
GROUP BY l1.login_date
ORDER BY l1.login_date

SQL10 统计活跃间隔对用户分级结果

SELECT user_grade, ROUND(COUNT(1) / (SELECT COUNT(DISTINCT uid) FROM tb_user_log), 2) AS ratio
FROM (
    SELECT uid,
           CASE
             WHEN SUM(maybe_new) > 0 THEN '新晋用户'
             WHEN SUM(maybe_loyal) > 0 THEN '忠实用户'
             WHEN SUM(maybe_sleep) > 0 THEN '沉睡用户'
             ELSE '流失用户'
           END AS user_grade
    FROM (
        SELECT uid,
               IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) <= 6 AND ranking > 1, 1, 0) AS maybe_loyal,
               IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) <= 6 AND ranking = 1, 1, 0) AS maybe_new,
               IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) BETWEEN 7 AND 29 AND ranking = 1, 1, 0) AS maybe_sleep,
               IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) >= 30, 1, 0) AS maybe_lost
        FROM (
            SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
            FROM (
                SELECT uid, DATE(in_time) AS login_date
                FROM tb_user_log
                UNION
                SELECT uid, DATE(out_time) AS login_date
                FROM tb_user_log
            ) login_log
        ) login_ranking
    ) maybe
    GROUP BY uid
) tmp
GROUP BY user_grade
ORDER BY ratio DESC, user_grade

SQL11 每天的日活数及新用户占比

SELECT login_date AS dt, COUNT(DISTINCT uid) AS dau, ROUND(SUM(IF(ranking = 1, 1, 0)) / COUNT(DISTINCT uid), 2) AS uv_new_ratio
FROM (
    SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
    FROM (
        SELECT uid, DATE(in_time) AS login_date
        FROM tb_user_log
        UNION
        SELECT uid, DATE(out_time) AS login_date
        FROM tb_user_log
    ) login_log
) tmp
GROUP BY dt
ORDER BY dt

SQL12 连续签到领金币

SELECT uid, DATE_FORMAT(dt, '%Y%m') AS month, SUM(coin) AS coin
FROM (
    SELECT @continue_days := IF(uid = @uid AND DATEDIFF(dt, @last_dt) = 1, IF(@continue_days < 7, @continue_days + 1, 1), 1),
           IF(uid = @uid AND DATEDIFF(dt, @last_dt) = 1, CASE @continue_days WHEN 3 THEN 3 WHEN 7 THEN 7 ELSE 1 END, 1) AS coin,
           @uid := uid AS uid,
           @last_dt := dt AS dt
    FROM (
        SELECT uid, DATE(in_time) AS dt
        FROM tb_user_log
        WHERE DATE(in_time) BETWEEN '2021-07-07' AND '2021-10-31' AND artical_id = '0' AND sign_in = '1'
        ORDER BY uid, dt
    ) AS a, (
        SELECT @uid := NULL, @last_dt := NULL, @continue_days := 0
    ) AS b
) tmp
GROUP BY uid, month
ORDER BY month, uid

SQL13 计算商城中2021年每月的GMV

SELECT DATE_FORMAT(event_time, '%Y-%m') AS month, ROUND(SUM(total_amount), 0) AS GMV
FROM tb_order_overall
WHERE status = '0' OR status = '1' AND YEAR(event_time) = '2021'
GROUP BY DATE_FORMAT(event_time, '%Y-%m')
HAVING SUM(total_amount) > 100000
ORDER BY GMV

SQL14 统计2021年10月每个退货率不大于0.5的商品各项指标

SELECT product_id,
       ROUND(SUM(if_click) / COUNT(1), 3) AS ctr,
       ROUND(IFNULL(SUM(if_cart) / SUM(if_click), 0), 3) AS cart_rate,
       ROUND(IFNULL(SUM(if_payment) / SUM(if_cart), 0), 3) AS payment_rate,
       ROUND(IFNULL(SUM(if_refund) / SUM(if_payment), 0), 3) AS refund_rate
FROM tb_user_event
WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
GROUP BY product_id
HAVING IFNULL(SUM(if_refund) / SUM(if_payment), 0) <= 0.5
ORDER BY product_id

SQL15 某店铺的各商品毛利率及店铺整体毛利率

WITH stat AS (
    SELECT product.product_id, SUM(detail.price * detail.cnt) AS sale_total, SUM(product.in_price * detail.cnt) AS total_in_price
    FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
    WHERE DATE(overall.event_time) >= '2021-10-01' AND product.shop_id = '901'
    GROUP BY product.product_id
)
(
    SELECT '店铺汇总' AS product_id, CONCAT(ROUND(100 * (1 - SUM(total_in_price) / SUM(sale_total)), 1), '%') AS profit_rate
    FROM stat
)
UNION ALL
(
    SELECT product_id, CONCAT(ROUND(100 * (1 - total_in_price / sale_total), 1), '%') AS profit_rate
    FROM stat
    WHERE 1 - total_in_price / sale_total > 0.249
    ORDER BY profit_rate
)

SQL16 零食类商品中复购率top3高的商品

SELECT stat.product_id, ROUND(SUM(IF(stat.purchase_cnt >= 2, 1, 0)) / COUNT(1), 3) AS repurchase_rate
FROM (
    SELECT product.product_id, overall.uid, COUNT(1) AS purchase_cnt
    FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
    WHERE product.tag = '零食' AND DATEDIFF((SELECT MAX(event_time) FROM tb_order_overall), overall.event_time) < 90
    GROUP BY product.product_id, overall.uid
) stat
GROUP BY stat.product_id
ORDER BY repurchase_rate DESC, stat.product_id ASC
LIMIT 3

SQL17 10月的新户客单价和获客成本

SELECT ROUND(SUM(overall.total_amount) / COUNT(1), 1) AS avg_amount, ROUND(SUM(detail.total_price - overall.total_amount) / COUNT(1), 1) AS avg_cost
FROM (
    SELECT uid, order_id, total_amount, event_time, RANK() OVER (PARTITION BY uid ORDER BY event_time) AS ranking
    FROM tb_order_overall
) overall JOIN (
    SELECT order_id, SUM(price) AS total_price
    FROM tb_order_detail
    GROUP BY order_id
) detail ON overall.order_id = detail.order_id
WHERE DATE_FORMAT(overall.event_time, '%Y%m') = '202110' AND overall.ranking = 1

SQL18 店铺901国庆期间的7日动销率和滞销率

WITH dt_list AS (
    SELECT dt_window.dt AS dt
    FROM (
        SELECT '2021-10-01' AS dt
        UNION ALL
        SELECT '2021-10-02' AS dt
        UNION ALL
        SELECT '2021-10-03' AS dt
    ) dt_window JOIN (
        SELECT DISTINCT DATE(event_time) AS dt
        FROM tb_order_overall
    ) order_dt ON dt_window.dt = order_dt.dt
)
SELECT on_sale_stat.dt,
       ROUND(IFNULL(sold_out_cnt, 0) / on_sale_cnt, 3) AS sale_rate,
       ROUND(1 - IFNULL(sold_out_cnt, 0) / on_sale_cnt, 3) AS unsale_rate
FROM (
    SELECT dates.dt, COUNT(1) AS on_sale_cnt
    FROM (
        SELECT dt
        FROM dt_list
    ) dates JOIN (
        SELECT product_id, release_time
        FROM tb_product_info
        WHERE shop_id = '901'
    ) on_sale ON DATE(on_sale.release_time) <= dates.dt
    GROUP BY dates.dt
) on_sale_stat LEFT JOIN (
    SELECT dates.dt, COUNT(DISTINCT sold_out.product_id) AS sold_out_cnt
    FROM (
        SELECT dt
        FROM dt_list
    ) dates JOIN (
        SELECT product.product_id, overall.event_time
        FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
        WHERE product.shop_id = '901'
    ) sold_out ON DATE(sold_out.event_time) BETWEEN DATE_SUB(dates.dt, INTERVAL 6 dAY) AND dates.dt
    GROUP BY dates.dt
) sold_out_stat ON on_sale_stat.dt = sold_out_stat.dt
ORDER BY on_sale_stat.dt

SQL19 2021年国庆在北京接单3次及以上的司机统计信息

SELECT '北京' AS city, ROUND(AVG(tmp.order_num), 3) AS avg_order_num, ROUND(AVG(total_fare), 3) AS avg_income
FROM (
    SELECT orders.driver_id, COUNT(1) AS order_num, SUM(IFNULL(orders.fare, 0)) AS total_fare
    FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
    WHERE records.city = '北京' AND DATE(orders.order_time) BETWEEN '2021-10-01' AND '2021-10-07'
    GROUP BY orders.driver_id
    HAVING COUNT(1) >= 3
) tmp

SQL20 有取消订单记录的司机平均评分

SELECT COALESCE(drivers.did, '总体') AS driver_id, ROUND(AVG(grades.grade), 1) AS avg_grade
FROM (
    SELECT DISTINCT orders.driver_id AS did
    FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
    WHERE DATE_FORMAT(orders.order_time, '%Y%m') = '202110' AND orders.start_time IS NULL
) drivers JOIN (
    SELECT driver_id AS did, grade
    FROM tb_get_car_order
    WHERE grade IS NOT NULL
) grades ON drivers.did = grades.did
GROUP BY drivers.did WITH ROLLUP
ORDER BY driver_id = '总体', driver_id

SQL21 每个城市中评分最高的司机信息

WITH grade_stat AS (
    SELECT t2.city, t2.driver_id, ROUND(t2.avg_grade, 1) AS avg_grade
    FROM (
        SELECT t1.city, t1.driver_id, t1.avg_grade, RANK() OVER (PARTITION BY t1.city ORDER BY t1.avg_grade DESC) AS ranking
        FROM (
            SELECT records.city, orders.driver_id, AVG(orders.grade) AS avg_grade
            FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
            WHERE orders.grade IS NOT NULL
            GROUP BY records.city, orders.driver_id
        ) t1
    ) t2
    WHERE t2.ranking = 1
)
SELECT grade_stat.city, grade_stat.driver_id, grade_stat.avg_grade, tmp2.avg_order_num, tmp2.avg_mileage
FROM (
    SELECT tmp1.driver_id, ROUND(AVG(tmp1.order_num), 1) AS avg_order_num, ROUND(AVG(tmp1.total_mileage), 3) AS avg_mileage
    FROM (
        SELECT driver_id, DATE(order_time) AS order_dt, COUNT(1) AS order_num, SUM(IFNULL(mileage, 0)) AS total_mileage
        FROM tb_get_car_order
        GROUP BY driver_id, DATE(order_time)
    ) tmp1
    GROUP BY tmp1.driver_id
) tmp2 JOIN grade_stat ON tmp2.driver_id = grade_stat.driver_id
ORDER BY avg_order_num

SQL22 国庆期间近7日日均取消订单量

SELECT dt_list.dt AS dt, ROUND(IFNULL(stat.finish_num_7d, 0), 2) AS finish_num_7d, ROUND(IFNULL(stat.cancel_num_7d, 0), 2) AS cancel_num_7d
FROM (
    SELECT '2021-10-01' AS dt
    UNION ALL
    SELECT '2021-10-02' AS dt
    UNION ALL
    SELECT '2021-10-03' AS dt
) dt_list LEFT JOIN (
    SELECT tmp.dt, 
           SUM(tmp.finish_num) OVER (ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) / 7 AS finish_num_7d,
           SUM(tmp.cancel_num) OVER (ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) / 7 AS cancel_num_7d
    FROM (
        SELECT DATE(order_time) AS dt, SUM(IF(start_time IS NOT NULL, 1, 0)) AS finish_num, SUM(IF(start_time IS NULL, 1, 0)) AS cancel_num
        FROM tb_get_car_order
        WHERE DATE(order_time) BETWEEN '2021-09-25' AND '2021-10-03'
        GROUP BY DATE(order_time)
    ) tmp
) stat ON dt_list.dt = stat.dt
ORDER BY dt

SQL23 工作日各时段叫车量、等待接单时间和调度时间

SELECT period,
       COUNT(1) AS get_car_num,
       ROUND(AVG(wait_time) / 60, 1) AS avg_wait_time,
       ROUND(AVG(dispatch_time) / 60, 1) AS avg_dispatch_time
FROM (
    SELECT CASE WHEN HOUR(event_time) >= 7 AND HOUR(event_time) < 9 THEN '早高峰'
                WHEN HOUR(event_time) >= 9 AND HOUR(event_time) < 17 THEN '工作时间'
                WHEN HOUR(event_time) >= 17 AND HOUR(event_time) < 20 THEN '晚高峰'
                ELSE '休息时间'
           END AS period,
           TIMESTAMPDIFF(SECOND, event_time, order_time) AS wait_time,
           TIMESTAMPDIFF(SECOND, order_time, start_time) AS dispatch_time
    FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
    WHERE WEEKDAY(event_time) BETWEEN 0 AND 4
) tmp
GROUP BY period
ORDER BY get_car_num

SQL24 各城市最大同时等车人数

SELECT city, MAX(wait_uv) AS max_wait_uv
FROM (
    SELECT city, ts, SUM(flag) OVER (PARTITION BY city ORDER BY ts ASC, flag DESC) AS wait_uv
    FROM (
        SELECT city, event_time AS ts, 1 AS flag
        FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
        WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
        UNION ALL
        SELECT city, start_time AS ts, -1 AS flag
        FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
        WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
    ) tmp1
) tmp2
GROUP BY city
ORDER BY max_wait_uv, city

你可能感兴趣的:(数据库,sql,面试,数据库)