SQL1 各个视频的平均完播率
SELECT log.video_id, ROUND(SUM(IF(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) >= info.duration, 1, 0)) / COUNT(1), 3) AS avg_comp_play_rate
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE YEAR(log.start_time) = 2021
GROUP BY log.video_id
ORDER BY avg_comp_play_rate DESC
SQL2 平均播放进度大于60%的视频类别
SELECT tmp.tag, CONCAT(ROUND(tmp.avg_play_progress * 100, 2), '%') AS avg_play_progress
FROM (
SELECT info.tag, AVG(LEAST(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) / info.duration, 1)) AS avg_play_progress
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
GROUP BY info.tag
) tmp
WHERE tmp.avg_play_progress > 0.6
ORDER BY tmp.avg_play_progress DESC
SQL3 每类视频近一个月的转发量/率
SELECT info.tag, SUM(log.if_retweet) AS retweet_cut, ROUND(SUM(log.if_retweet) / COUNT(1), 3) AS retweet_rate
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), log.end_time) < 30
GROUP BY info.tag
ORDER BY retweet_rate DESC
SQL4 每个创作者每月的涨粉率及截止当前的总粉丝量
SELECT tmp.author,
tmp.month,
ROUND((tmp.plus_follow - tmp.minus_follow) / total_play, 3) AS fans_growth_rate,
SUM(tmp.plus_follow - tmp.minus_follow) OVER (PARTITION BY tmp.author ORDER BY tmp.month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS total_fans
FROM (
SELECT info.author,
DATE_FORMAT(log.start_time, '%Y-%m') AS month,
SUM(IF(log.if_follow = '1', 1, 0)) AS plus_follow,
SUM(IF(log.if_follow = '2', 1, 0)) AS minus_follow,
COUNT(1) AS total_play
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE YEAR(log.start_time) = '2021'
GROUP BY info.author, DATE_FORMAT(log.start_time, '%Y-%m')
) tmp
ORDER BY tmp.author, total_fans
SQL5 国庆期间每类视频点赞量和转发量
SELECT output.*
FROM (
SELECT tmp.tag,
tmp.dt,
SUM(tmp.total_like) OVER (PARTITION BY tmp.tag ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) AS sum_like_cnt_7d,
MAX(tmp.total_retweet) OVER (PARTITION BY tmp.tag ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) AS max_retweet_cnt_7d
FROM (
SELECT info.tag, DATE(log.start_time) AS dt, SUM(log.if_like) AS total_like, SUM(log.if_retweet) AS total_retweet
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE log.start_time BETWEEN 20210925000000 AND 20211003235959
GROUP BY info.tag, DATE(log.start_time)
) tmp
) output
WHERE output.dt IN ('2021-10-01', '2021-10-02', '2021-10-03')
ORDER BY output.tag DESC, output.dt ASC
SQL6 近一个月发布的视频中热度最高的top3视频
SELECT tmp.video_id, ROUND((100 * tmp.finish_rate + 5 * tmp.total_like + 3 * tmp.total_comment + 2 * tmp.total_retweet) * tmp.fresh_rate, 0) AS hot_index
FROM (
SELECT log.video_id,
SUM(IF(TIMESTAMPDIFF(SECOND, log.start_time, log.end_time) >= info.duration, 1, 0)) / COUNT(1) AS finish_rate,
SUM(log.if_like) AS total_like,
SUM(IF(log.comment_id IS NOT NULL, 1, 0)) AS total_comment,
SUM(log.if_retweet) AS total_retweet,
1 / (1 + DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), MAX(log.end_time))) AS fresh_rate
FROM tb_user_video_log AS log JOIN tb_video_info AS info ON log.video_id = info.video_id
WHERE DATEDIFF((SELECT MAX(end_time) FROM tb_user_video_log), info.release_time) <= 29
GROUP BY log.video_id
) tmp
ORDER BY hot_index DESC
LIMIT 3
SQL7 2021年11月每天的人均浏览文章时长
SELECT DATE(out_time) AS dt, ROUND(SUM(TIMESTAMPDIFF(SECOND, in_time, out_time)) / COUNT(DISTINCT uid), 1) AS avg_viiew_len_sec
FROM tb_user_log
WHERE DATE_FORMAT(out_time, '%Y%m') = '202111' AND artical_id <> '0'
GROUP BY DATE(out_time)
ORDER BY avg_viiew_len_sec
SQL8 每篇文章同一时刻最大在看人数
SELECT tmp.artical_id, MAX(tmp.cnt) AS max_uv
FROM (
SELECT log.artical_id,
(SELECT COUNT(1)
FROM tb_user_log
WHERE in_time <= log.in_time
AND out_time >= log.in_time
AND artical_id = log.artical_id) AS cnt
FROM tb_user_log AS log
WHERE log.artical_id <> '0'
) tmp
GROUP BY tmp.artical_id
ORDER BY max_uv DESC
SQL9 2021年11月每天新用户的次日留存率
WITH login_ranking AS (
SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
FROM (
SELECT uid, DATE(in_time) AS login_date
FROM tb_user_log
UNION
SELECT uid, DATE(out_time) AS login_date
FROM tb_user_log
) login_log
)
SELECT l1.login_date, ROUND(SUM(IF(l2.uid IS NOT NULL, 1, 0)) / COUNT(1), 2) AS uv_left_rate
FROM login_ranking AS l1 LEFT JOIN login_ranking AS l2 ON l1.uid = l2.uid AND l2.ranking = l1.ranking + 1
WHERE DATE_FORMAT(l1.login_date, '%Y%m') = '202111' AND l1.ranking = 1
GROUP BY l1.login_date
ORDER BY l1.login_date
SQL10 统计活跃间隔对用户分级结果
SELECT user_grade, ROUND(COUNT(1) / (SELECT COUNT(DISTINCT uid) FROM tb_user_log), 2) AS ratio
FROM (
SELECT uid,
CASE
WHEN SUM(maybe_new) > 0 THEN '新晋用户'
WHEN SUM(maybe_loyal) > 0 THEN '忠实用户'
WHEN SUM(maybe_sleep) > 0 THEN '沉睡用户'
ELSE '流失用户'
END AS user_grade
FROM (
SELECT uid,
IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) <= 6 AND ranking > 1, 1, 0) AS maybe_loyal,
IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) <= 6 AND ranking = 1, 1, 0) AS maybe_new,
IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) BETWEEN 7 AND 29 AND ranking = 1, 1, 0) AS maybe_sleep,
IF(DATEDIFF((SELECT MAX(out_time) FROM tb_user_log), login_date) >= 30, 1, 0) AS maybe_lost
FROM (
SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
FROM (
SELECT uid, DATE(in_time) AS login_date
FROM tb_user_log
UNION
SELECT uid, DATE(out_time) AS login_date
FROM tb_user_log
) login_log
) login_ranking
) maybe
GROUP BY uid
) tmp
GROUP BY user_grade
ORDER BY ratio DESC, user_grade
SQL11 每天的日活数及新用户占比
SELECT login_date AS dt, COUNT(DISTINCT uid) AS dau, ROUND(SUM(IF(ranking = 1, 1, 0)) / COUNT(DISTINCT uid), 2) AS uv_new_ratio
FROM (
SELECT uid, login_date, RANK() OVER (PARTITION BY uid ORDER BY login_date) AS ranking
FROM (
SELECT uid, DATE(in_time) AS login_date
FROM tb_user_log
UNION
SELECT uid, DATE(out_time) AS login_date
FROM tb_user_log
) login_log
) tmp
GROUP BY dt
ORDER BY dt
SQL12 连续签到领金币
SELECT uid, DATE_FORMAT(dt, '%Y%m') AS month, SUM(coin) AS coin
FROM (
SELECT @continue_days := IF(uid = @uid AND DATEDIFF(dt, @last_dt) = 1, IF(@continue_days < 7, @continue_days + 1, 1), 1),
IF(uid = @uid AND DATEDIFF(dt, @last_dt) = 1, CASE @continue_days WHEN 3 THEN 3 WHEN 7 THEN 7 ELSE 1 END, 1) AS coin,
@uid := uid AS uid,
@last_dt := dt AS dt
FROM (
SELECT uid, DATE(in_time) AS dt
FROM tb_user_log
WHERE DATE(in_time) BETWEEN '2021-07-07' AND '2021-10-31' AND artical_id = '0' AND sign_in = '1'
ORDER BY uid, dt
) AS a, (
SELECT @uid := NULL, @last_dt := NULL, @continue_days := 0
) AS b
) tmp
GROUP BY uid, month
ORDER BY month, uid
SQL13 计算商城中2021年每月的GMV
SELECT DATE_FORMAT(event_time, '%Y-%m') AS month, ROUND(SUM(total_amount), 0) AS GMV
FROM tb_order_overall
WHERE status = '0' OR status = '1' AND YEAR(event_time) = '2021'
GROUP BY DATE_FORMAT(event_time, '%Y-%m')
HAVING SUM(total_amount) > 100000
ORDER BY GMV
SQL14 统计2021年10月每个退货率不大于0.5的商品各项指标
SELECT product_id,
ROUND(SUM(if_click) / COUNT(1), 3) AS ctr,
ROUND(IFNULL(SUM(if_cart) / SUM(if_click), 0), 3) AS cart_rate,
ROUND(IFNULL(SUM(if_payment) / SUM(if_cart), 0), 3) AS payment_rate,
ROUND(IFNULL(SUM(if_refund) / SUM(if_payment), 0), 3) AS refund_rate
FROM tb_user_event
WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
GROUP BY product_id
HAVING IFNULL(SUM(if_refund) / SUM(if_payment), 0) <= 0.5
ORDER BY product_id
SQL15 某店铺的各商品毛利率及店铺整体毛利率
WITH stat AS (
SELECT product.product_id, SUM(detail.price * detail.cnt) AS sale_total, SUM(product.in_price * detail.cnt) AS total_in_price
FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
WHERE DATE(overall.event_time) >= '2021-10-01' AND product.shop_id = '901'
GROUP BY product.product_id
)
(
SELECT '店铺汇总' AS product_id, CONCAT(ROUND(100 * (1 - SUM(total_in_price) / SUM(sale_total)), 1), '%') AS profit_rate
FROM stat
)
UNION ALL
(
SELECT product_id, CONCAT(ROUND(100 * (1 - total_in_price / sale_total), 1), '%') AS profit_rate
FROM stat
WHERE 1 - total_in_price / sale_total > 0.249
ORDER BY profit_rate
)
SQL16 零食类商品中复购率top3高的商品
SELECT stat.product_id, ROUND(SUM(IF(stat.purchase_cnt >= 2, 1, 0)) / COUNT(1), 3) AS repurchase_rate
FROM (
SELECT product.product_id, overall.uid, COUNT(1) AS purchase_cnt
FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
WHERE product.tag = '零食' AND DATEDIFF((SELECT MAX(event_time) FROM tb_order_overall), overall.event_time) < 90
GROUP BY product.product_id, overall.uid
) stat
GROUP BY stat.product_id
ORDER BY repurchase_rate DESC, stat.product_id ASC
LIMIT 3
SQL17 10月的新户客单价和获客成本
SELECT ROUND(SUM(overall.total_amount) / COUNT(1), 1) AS avg_amount, ROUND(SUM(detail.total_price - overall.total_amount) / COUNT(1), 1) AS avg_cost
FROM (
SELECT uid, order_id, total_amount, event_time, RANK() OVER (PARTITION BY uid ORDER BY event_time) AS ranking
FROM tb_order_overall
) overall JOIN (
SELECT order_id, SUM(price) AS total_price
FROM tb_order_detail
GROUP BY order_id
) detail ON overall.order_id = detail.order_id
WHERE DATE_FORMAT(overall.event_time, '%Y%m') = '202110' AND overall.ranking = 1
SQL18 店铺901国庆期间的7日动销率和滞销率
WITH dt_list AS (
SELECT dt_window.dt AS dt
FROM (
SELECT '2021-10-01' AS dt
UNION ALL
SELECT '2021-10-02' AS dt
UNION ALL
SELECT '2021-10-03' AS dt
) dt_window JOIN (
SELECT DISTINCT DATE(event_time) AS dt
FROM tb_order_overall
) order_dt ON dt_window.dt = order_dt.dt
)
SELECT on_sale_stat.dt,
ROUND(IFNULL(sold_out_cnt, 0) / on_sale_cnt, 3) AS sale_rate,
ROUND(1 - IFNULL(sold_out_cnt, 0) / on_sale_cnt, 3) AS unsale_rate
FROM (
SELECT dates.dt, COUNT(1) AS on_sale_cnt
FROM (
SELECT dt
FROM dt_list
) dates JOIN (
SELECT product_id, release_time
FROM tb_product_info
WHERE shop_id = '901'
) on_sale ON DATE(on_sale.release_time) <= dates.dt
GROUP BY dates.dt
) on_sale_stat LEFT JOIN (
SELECT dates.dt, COUNT(DISTINCT sold_out.product_id) AS sold_out_cnt
FROM (
SELECT dt
FROM dt_list
) dates JOIN (
SELECT product.product_id, overall.event_time
FROM tb_product_info AS product JOIN tb_order_detail AS detail ON product.product_id = detail.product_id JOIN tb_order_overall AS overall ON detail.order_id = overall.order_id
WHERE product.shop_id = '901'
) sold_out ON DATE(sold_out.event_time) BETWEEN DATE_SUB(dates.dt, INTERVAL 6 dAY) AND dates.dt
GROUP BY dates.dt
) sold_out_stat ON on_sale_stat.dt = sold_out_stat.dt
ORDER BY on_sale_stat.dt
SQL19 2021年国庆在北京接单3次及以上的司机统计信息
SELECT '北京' AS city, ROUND(AVG(tmp.order_num), 3) AS avg_order_num, ROUND(AVG(total_fare), 3) AS avg_income
FROM (
SELECT orders.driver_id, COUNT(1) AS order_num, SUM(IFNULL(orders.fare, 0)) AS total_fare
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE records.city = '北京' AND DATE(orders.order_time) BETWEEN '2021-10-01' AND '2021-10-07'
GROUP BY orders.driver_id
HAVING COUNT(1) >= 3
) tmp
SQL20 有取消订单记录的司机平均评分
SELECT COALESCE(drivers.did, '总体') AS driver_id, ROUND(AVG(grades.grade), 1) AS avg_grade
FROM (
SELECT DISTINCT orders.driver_id AS did
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE DATE_FORMAT(orders.order_time, '%Y%m') = '202110' AND orders.start_time IS NULL
) drivers JOIN (
SELECT driver_id AS did, grade
FROM tb_get_car_order
WHERE grade IS NOT NULL
) grades ON drivers.did = grades.did
GROUP BY drivers.did WITH ROLLUP
ORDER BY driver_id = '总体', driver_id
SQL21 每个城市中评分最高的司机信息
WITH grade_stat AS (
SELECT t2.city, t2.driver_id, ROUND(t2.avg_grade, 1) AS avg_grade
FROM (
SELECT t1.city, t1.driver_id, t1.avg_grade, RANK() OVER (PARTITION BY t1.city ORDER BY t1.avg_grade DESC) AS ranking
FROM (
SELECT records.city, orders.driver_id, AVG(orders.grade) AS avg_grade
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE orders.grade IS NOT NULL
GROUP BY records.city, orders.driver_id
) t1
) t2
WHERE t2.ranking = 1
)
SELECT grade_stat.city, grade_stat.driver_id, grade_stat.avg_grade, tmp2.avg_order_num, tmp2.avg_mileage
FROM (
SELECT tmp1.driver_id, ROUND(AVG(tmp1.order_num), 1) AS avg_order_num, ROUND(AVG(tmp1.total_mileage), 3) AS avg_mileage
FROM (
SELECT driver_id, DATE(order_time) AS order_dt, COUNT(1) AS order_num, SUM(IFNULL(mileage, 0)) AS total_mileage
FROM tb_get_car_order
GROUP BY driver_id, DATE(order_time)
) tmp1
GROUP BY tmp1.driver_id
) tmp2 JOIN grade_stat ON tmp2.driver_id = grade_stat.driver_id
ORDER BY avg_order_num
SQL22 国庆期间近7日日均取消订单量
SELECT dt_list.dt AS dt, ROUND(IFNULL(stat.finish_num_7d, 0), 2) AS finish_num_7d, ROUND(IFNULL(stat.cancel_num_7d, 0), 2) AS cancel_num_7d
FROM (
SELECT '2021-10-01' AS dt
UNION ALL
SELECT '2021-10-02' AS dt
UNION ALL
SELECT '2021-10-03' AS dt
) dt_list LEFT JOIN (
SELECT tmp.dt,
SUM(tmp.finish_num) OVER (ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) / 7 AS finish_num_7d,
SUM(tmp.cancel_num) OVER (ORDER BY tmp.dt RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW) / 7 AS cancel_num_7d
FROM (
SELECT DATE(order_time) AS dt, SUM(IF(start_time IS NOT NULL, 1, 0)) AS finish_num, SUM(IF(start_time IS NULL, 1, 0)) AS cancel_num
FROM tb_get_car_order
WHERE DATE(order_time) BETWEEN '2021-09-25' AND '2021-10-03'
GROUP BY DATE(order_time)
) tmp
) stat ON dt_list.dt = stat.dt
ORDER BY dt
SQL23 工作日各时段叫车量、等待接单时间和调度时间
SELECT period,
COUNT(1) AS get_car_num,
ROUND(AVG(wait_time) / 60, 1) AS avg_wait_time,
ROUND(AVG(dispatch_time) / 60, 1) AS avg_dispatch_time
FROM (
SELECT CASE WHEN HOUR(event_time) >= 7 AND HOUR(event_time) < 9 THEN '早高峰'
WHEN HOUR(event_time) >= 9 AND HOUR(event_time) < 17 THEN '工作时间'
WHEN HOUR(event_time) >= 17 AND HOUR(event_time) < 20 THEN '晚高峰'
ELSE '休息时间'
END AS period,
TIMESTAMPDIFF(SECOND, event_time, order_time) AS wait_time,
TIMESTAMPDIFF(SECOND, order_time, start_time) AS dispatch_time
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE WEEKDAY(event_time) BETWEEN 0 AND 4
) tmp
GROUP BY period
ORDER BY get_car_num
SQL24 各城市最大同时等车人数
SELECT city, MAX(wait_uv) AS max_wait_uv
FROM (
SELECT city, ts, SUM(flag) OVER (PARTITION BY city ORDER BY ts ASC, flag DESC) AS wait_uv
FROM (
SELECT city, event_time AS ts, 1 AS flag
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
UNION ALL
SELECT city, start_time AS ts, -1 AS flag
FROM tb_get_car_record AS records JOIN tb_get_car_order AS orders ON records.order_id = orders.order_id
WHERE DATE_FORMAT(event_time, '%Y%m') = '202110'
) tmp1
) tmp2
GROUP BY city
ORDER BY max_wait_uv, city