SQL156 各个视频的平均完播率
select video_id,
round(avg(case when timestampdiff(second,start_time,end_time)>= duration then 1 else 0 end),3) as avg_comp_play_rate
from tb_user_video_log
join tb_video_info using(video_id)
where date_format(end_time,'%Y')=2021
group by video_id
order by avg_comp_play_rate desc
SQL157 平均播放进度大于60%的视频类别
select tag,
concat(
round(
avg(
if(
timestampdiff(second,start_time,end_time)>duration,
1,timestampdiff(second,start_time,end_time)/duration
)
)*100
,2)
,'%') as avg_play_progress
from tb_user_video_log
join tb_video_info using(video_id)
group by tag
having avg_play_progress>60
order by avg_play_progress desc
SQL158 每类视频进一个月的转发量/率
(1. datediff(time1 , time2 )表示两个时间的时间差 ;
2.where 不能直接和聚和函数一起使用,因此第6行中date里面使用max要再套用一个select;
3. date里面加入select,要多加一个括号;
4. 为什么不把where 换成 having?
having是group by之后用的过滤语句,where是group by之前的约束语句,使用having还是where取决于题目中要求是先筛选后分组(where)还是先分组后过滤(having))
select tag,
sum(if_retweet) as retweet_cnt,
round(sum(if_retweet)/count(start_time),3) as retweet_rate
from tb_user_video_log
join tb_video_info using(video_id)
where datediff(date((select max(start_time) from tb_user_video_log)),date(start_time)) <=29
group by tag
order by retweet_rate desc
SQL159 每个创作者每月的涨粉率及截止当前的总粉丝量
方法一
(where子句中不能使用窗口函数和聚合函数。
group by一般会搭配聚和函数一起出现)
select author,
month,
round(add_fans/counts,3) as fans_growth_rate,
sum(add_fans)over(partition by author order by month) as total_fans
from (
select author,
date_format(start_time,'%Y-%m') as month,
sum(case when if_follow=2 then -1 else if_follow end)as add_fans,
count(*) as counts
from tb_user_video_log
left join tb_video_info using(video_id)
where year(start_time)=2021
group by author,month
) as a
order by author,total_fans
方法二:
两层sum是因为第一个sum是针对每一个月内进行计算,第二个sum是为了每一个author在不同月份的累加。
SELECT
author,
date_format(start_time,'%Y-%m') month,
round(sum(case when if_follow=1 then 1
when if_follow=2 then -1
else 0 end)/count(author),3) fans_growth_rate,
sum(sum(case when if_follow=1 then 1
when if_follow=2 then -1
else 0 end)) over(partition by author order by date_format(start_time,'%Y-%m')) total_fans
FROM tb_user_video_log log
left join tb_video_info info on log.video_id=info.video_id
where year(start_time)=2021
group by author,month
order by author,total_fans
SQL160 国庆期间每类视频点赞量和转发量
(1. rows 和 preceding 是什么意思?
举例:select *, avg(成绩) over (order by 学号 rows 2 preceding) as current_avg from 班级表;
rows和preceding这两个关键字,是“之前~行”的意思,上面的句子中,是之前2行。也就是得到的结果是自身记录及前2行的平均(相对应的preceding是following,表示之后的意思)
题目中,表示的是 统计得到当前行及后6行的点赞量统计sum_like_cnt_7d,和转发量sum_retweet_cnt_7d。
2.rows 和 preceding 的用法
ROWS BETWEEN 1 preceding AND current row 是指当前行的上一行(rownum-1)到当前行的汇总
ROWS BETWEEN 1 preceding AND 1 following 是指当前行的上一行(rownum-1)到当前行的下1行(rownum+1)的汇总
ROWS BETWEEN current row AND unbounded following 指当前行到最后一行的汇总
ROWS BETWEEN unbounded preceding AND current row 是指第一行至当前行的汇总
3.答案中的 order by dt rows between 6 preceding and current row(升序)
也可以改为order by dt desc rows between current row and 6 following(降序)
如果rows between a and b中a和b的位置颠倒了,会报错。
4.两次使用sum,第一次sum是求每天的,第二次才是求累加的
5.窗口函数的执行顺序是在having和where之后,所以当窗口函数和where,having同时出现时,一般会给窗口函数嵌套)
with t as(
select tag,
date_format(start_time,'%Y-%m-%d') as dt,
sum(if_like) as like_cnt,
sum(if_retweet) as retweet_cnt
from tb_user_video_log
left join tb_video_info using (video_id)
group by tag,dt
)
select *
from (
select tag,
dt,
sum(like_cnt) over (partition by tag order by dt rows between 6 preceding and current row) as sum_like_cnt_7d,
max(retweet_cnt) over (partition by tag order by dt rows between 6 preceding and current row) as max_retweet_cnt_7d
from t
) t1
where dt between '2021-10-01' and '2021-10-03'
order by tag desc,dt
SQL161 近一个月发布的视频中热度最高的top3视频
(1. 第7行 最近无播放天数
DATEDIFF((select date(max(end_time)) from tb_user_video_log),date(max(end_time))
如何理解?
SELECT MAX(end_time) FROM tb_user_video_log) 是整个表内最大的日期,相当于当前日期,
而MAX(date(end_time))是聚合后每个视频的最后播放日期。
当播放次数为0时,最近无播放天数=当前日期-发布日期;
当播放次数不为0时,最近无播放天数=当前日期-最近一次播放日期
2.倒数第四行筛选语句
where DATEDIFF(DATE((SELECT MAX(end_time) FROM tb_user_video_log)),DATE(release_time)) <= 29
如何理解?
题目要筛选出最近一个月发布的视频,用SELECT MAX(end_time) FROM tb_user_video_log 找到最大时间,把这个最大时间视为现在的日期,往前推一个月 ,小于等于29天发布的(DATE(release_time)),就是我们要的视频,DATEDIFF(date1, date2<=29); )
select video_id
,round(
((sum(if(timestampdiff(second,start_time,end_time)>=duration,1,0)))/count(*)*100 #视频完播率
+sum(if_like)*5 # 点赞数
+count(comment_id)*3 #评论数
+sum(if_retweet)*2) #转发数
* 1/(DATEDIFF((select date(max(end_time)) from tb_user_video_log),date(max(end_time)))+1) #新鲜度
,0) as hot_index
from tb_video_info
join tb_user_video_log using(video_id)
where DATEDIFF(DATE((SELECT MAX(end_time) FROM tb_user_video_log)),DATE(release_time)) <= 29
group by video_id
order by hot_index desc
limiT 3