/*现有试卷信息表examination_info(exam_id试卷ID, tag试卷类别, difficulty试卷难度, duration考试时长, release_time发布时间):
id exam_id tag difficulty duration release_time
1 9001 SQL hard 60 2021-09-01 06:00:00
2 9002 SQL hard 60 2021-09-01 06:00:00
3 9003 算法 medium 80 2021-09-01 10:00:00
试卷作答记录表exam_record(uid用户ID, exam_id试卷ID, start_time开始作答时间, submit_time交卷时间, score得分):
id uid exam_id start_time submit_time score
1 1001 9001 2021-09-01 09:01:01 2021-09-01 09:31:00 78
2 1002 9001 2021-09-01 09:01:01 2021-09-01 09:31:00 81
3 1002 9002 2021-09-01 12:01:01 2021-09-01 12:31:01 81
4 1003 9001 2021-09-01 19:01:01 2021-09-01 19:40:01 86
5 1003 9002 2021-09-01 12:01:01 2021-09-01 12:31:51 89
6 1004 9001 2021-09-01 19:01:01 2021-09-01 19:30:01 85
7 1005 9003 2021-09-01 12:01:01 2021-09-01 12:31:02 85
8 1006 9003 2021-09-07 10:01:01 2021-09-07 10:21:01 87
9 1003 9003 2021-09-08 12:01:01 2021-09-08 12:11:01 40
10 1003 9002 2021-09-01 14:01:01 (NULL) (NULL)
找到每类试卷得分的前3名,如果两人最大分数相同,选择最小分数大者,如果还相同,选择uid大者。由示例数据结果输出如下:
tid uid ranking
SQL 1003 1
SQL 1004 2
SQL 1002 3
算法 1005 1
算法 1006 2
算法 1003 3
**/
select tag, uid, ranking
from (
--获取各个科目每个用户得分的最大值和最小值
select ei.tag,
er.uid,
--排序
row_number() over(partition by ei.tag order by max(er.score) desc, min(er.score) desc, er.uid desc) ranking
from exam_record er
left join examination_info ei
on ei.exam_id = er.exam_id
group by ei.tag, er.uid) t1
where ranking <= 3;
/*找到第二快和第二慢用时之差大于试卷时长的一半的试卷信息,按试卷ID降序排序。
由示例数据结果输出如下:
exam_id duration release_time
9001 60 2021-09-01 06:00:00
**/
select distinct exam_id, duration, release_time
from (
--找出第二快和第二慢的试卷信息
select exam_id,
duration,
release_time,
sum(case
when rank1 = 2 then
costtime
when rank2 = 2 then
-costtime
else
0
end) as sub
from (
--计算答题时间,并按照答题时间从大到小排序(rank1)及从小到大排序(rank2),得到答题时间第二快和第二慢
select ei.exam_id,
ei.duration,
ei.release_time,
timestampdiff(minute, er.start_time, er.submit_time) as costtime,
row_number() over(partition by er.exam_id order by timestampdiff(minute, er.start_time, er.submit_time) desc) rank1,
row_number() over(partition by er.exam_id order by timestampdiff(minute, er.start_time, er.submit_time) asc) rank2
from exam_record er
join examination_info ei
on er.exam_id = ei.exam_id
where er.submit_time is not null) table1
group by exam_id) table2
where sub * 2 >= duration
order by exam_id desc;
--找到每个人近三个月有试卷作答记录的月份中没有试卷是未完成状态的用户的试卷作答完成数,按试卷完成数和用户ID降序排名。
select uid, count(score) exam_complete_cnt
from (select er.uid,
er.start_time,
er.score,
dense_rank() over(partition by uid order by date_format(start_time, '%Y%m') desc) as recent_months
from exam_record er) recent_table
where recent_months <= 3
group by uid
having count(score) = count(uid)
order by exam_complete_cnt desc, uid desc;
/*现有用户信息表user_info(uid用户ID,nick_name昵称, achievement成就值, level等级, job职业方向, register_time注册时间):
id uid nick_name achievement level job register_time
1 1001 牛客1号 3200 7 算法 2020-01-01 10:00:00
2 1002 牛客2号 2500 6 算法 2020-01-01 10:00:00
3 1003 牛客3号♂ 2200 5 算法 2020-01-01 10:00:00**/
--请统计SQL试卷上未完成率较高的50%用户中,6级和7级用户在有试卷作答记录的近三个月中,每个月的答卷数目和完成数目。按用户ID、月份升序排序。
select t1.uid,
--抽取月份
date_format(start_time, '%Y%m') start_month,
--答卷总数
count(start_time) total_cnt,
--提交总数
count(submit_time) complete_cnt
from (
-- 对作答时间进行排序
select *,
dense_rank() over(partition by uid order by date_format(start_time, '%Y-%m') desc) time_rk
from exam_record) t1
right join (
--查询排名低于50%的用户
select *
from (
--完成率进行分组排序
select er.uid,
percent_rank() over(order by count(er.submit_time) / count(er.start_time)) rate_rk
from exam_record er
where exam_id in (
--限定SQL试卷
select ei.exam_id
from examination_info ei
where tag = 'SQL')
group by er.uid) A
where rate_rk <= 0.5
and uid in (
-- 查找6.7级用户
select uid from user_info where level in (6, 7))) t2
on t1.uid = t2.uid
--限定作答时间最近的3个月
where t1.time_rk <= 3
group by uid, start_month
--按照用户id和月份进行升序排序
order by uid, start_month;
--请输出每份试卷每月作答数和截止当月的作答总数。
select *,
sum(month_cnt) over(partition by exam_id order by start_month) cum_exam_cnt
from (select exam_id,
date_format(start_time, '%y%m') start_month,
count(start_time) month_cnt
from exam_record
group by exam_id, start_month) t1
--or
select exam_id,
date_format(start_time, '%y%m') start_month,
count(start_time) month_cnt,
sum(count(start_time)) over(partition by exam_id order by date_format(start_time, '%y%m')) cum_exam_cnt
from exam_record
group by exam_id, start_month;
--请输出自从有用户作答记录以来,每月的试卷作答记录中月活用户数、新增用户数、截止当月的单月最大新增用户数、截止当月的累积用户数。结果按月份升序输出。
select
--月份
start_month,
--月活用户数
count(distinct uid) mau,
--新增用户数量
sum(new_day) month_add_uv,
--截至当月的单月新增最大新增用户数
max(sum(new_day)) over(order by start_month) max_month_add_uv,
--截至当月的累计用户数
sum(sum(new_day)) over(order by start_month) cum_sum_uv
from (select *,
date_format(start_time, '%Y%m') start_month,
--若用户的登录时间等于首次登录的时间定义为新用户,新用户被标记为1,其他标记为0
if(start_time = min(start_time) over(partition by uid), 1, 0) new_day
from exam_record) t1
group by start_month;
常用的排序函数
lead的窗口函数用法
datediff 函数
datediff和timestampdiff函数的区别
datediff()函数的作用是求日期差,也就是把一个时间的日期部分取出来求差。例如:'2021-09-05 12:00:00'和'2021-09-04 11:00:00'这两个日期,datediff只取2021-09-05和2021-09-04求日期差,并不会管后面的时间部分。
timestampdiff()函数的作用则是求时间戳的差,例如:'2021-09-05 12:00:00'和'2021-09-04 11:00:00'这两个日期,datediff只会先求出这个日期的时分秒差,之后再转换成天数来求日期差。