窗口函数的基本语法如下:
<窗口函数> over (partition by <用于分组的列名>
order by <用于排序的列名>)
<窗口函数>的位置,可以放以下两种函数:
注意事项
建表
create table student (sid char(2), sname char(5), sclass char(2));
create table course (cid char(2), cname char(10));
create table score (sid char(2), cid char(2), score int);
insert into student values('01', '崔健', '01');
insert into student values('02', '李健', '01');
insert into student values('03', '高虎', '01');
insert into student values('04', '子健', '01');
insert into student values('05', '石璐', '01');
insert into student values('06', '亚千', '01');
insert into student values('07', '史立', '01');
insert into student values('08', '窦唯', '01');
insert into student values('09', '华东', '01');
insert into course values('01', '金属');
insert into course values('02', '迷幻');
insert into course values('03', '朋克');
insert into course values('04', '后摇');
insert into score values('01', '01', 60);
insert into score values('02', '01', 85);
insert into score values('03', '01', 57);
insert into score values('04', '01', 34);
insert into score values('05', '01', 78);
insert into score values('06', '01', 90);
insert into score values('07', '01', 76);
insert into score values('08', '01', 90);
insert into score values('09', '01', 85);
insert into score values('01', '02', 78);
insert into score values('02', '02', 59);
insert into score values('03', '02', 59);
insert into score values('04', '02', 79);
insert into score values('05', '02', 88);
insert into score values('01', '03', 65);
insert into score values('03', '03', 89);
insert into score values('05', '03', 46);
insert into score values('06', '03', 85);
insert into score values('07', '03', 89);
insert into score values('08', '03', 79);
insert into score values('03', '04', 99);
insert into score values('04', '04', 95);
insert into score values('07', '04', 68);
insert into score values('08', '04', 59);
insert into score values('09', '04', 80);
SELECT s.sname, c.cname, sc.score,
ROW_NUMBER() OVER (PARTITION BY c.cname
ORDER BY sc.score DESC) AS row_num,
RANK() OVER (PARTITION BY c.cname
ORDER BY sc.score DESC) AS ranking,
DENSE_RANK() OVER(PARTITION BY c.cname
ORDER BY sc.score DESC) AS dense_ranking
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid
(rank - 1) / (rows - 1)
其中,rank为RANK()函数产生的序号,rows为当前窗口的记录总行数。
SELECT s.sname, c.cname, sc.score, RANK() OVER(PARTITION BY c.cname ORDER BY sc.score DESC) as ranking,
PERCENT_RANK() OVER (PARTITION BY c.cname
ORDER BY sc.score DESC) as percent
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid
SELECT s.sname, c.cname, sc.score, RANK() OVER(PARTITION BY c.cname ORDER BY sc.score DESC) as ranking,
CUME_DIST() OVER (PARTITION BY c.cname
ORDER BY sc.score DESC) as cumdist
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid;
lag和lead函数可以在同一次查询中取出同一字段的前N行数据(lag)和后N行数据(lead)
语法:
LAG(EXP_STR,OFFSET,DEFVAL)OVER()
LEAD(EXP_STR,OFFSET,DEFVAL)OVER()
EXP_STR
:要取的列
OFFSET
: 取偏移后的第几行数据
DEFVAL
:无偏移值的取值,默认为NULL
应用场景:求每个用户相邻两次浏览的时间差;求每个同学相邻两门考试的成绩差
SELECT s.sname, c.cname, sc.score,
lead(sc.score,1) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as leadVal,
lag(sc.score,1) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as lagVal,
score - leadVal as diff1,
score - lagVal as diff2
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid;
SELECT s.sname, c.cname, sc.score,
FIRST_VALUE(sc.score) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as firstVal,
LAST_VALUE(sc.score) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as lastVal
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid
SELECT s.sname, s.sclass, c.cname, sc.score,
nth_value(sc.score,1) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as 1th,
nth_value(sc.score,2) OVER (PARTITION BY s.sname
ORDER BY sc.score DESC) as 2th
FROM student s INNER JOIN score sc ON s.sid = sc.sid
INNER JOIN course c ON sc.cid = c.cid
SELECT
sid, cid, score, ntile(4) over(partition by sid order by score desc) as tile
DROM
score;
聚合函数用作窗口函数分为两种情况
SELECT
sid, cid, score,
sum(score) over (order by sid) as current_sum,
avg(score) over (order by sid) as current_avg,
count(score) over (order by sid) as current_count,
max(score) over (order by sid) as current_max,
min(score) over (order by sid) as current_min
FROM
score
排序字段没有重复的情况:
select sid, cid, score,
sum(score) over (order by sid,cid) as current_sum,
avg(score) over (order by sid,cid) as current_avg,
count(score) over (order by sid,cid) as current_count,
max(score) over (order by sid,cid) as current_max,
min(score) over (order by sid,cid) as current_min
from score
select cid, sid, score,
sum(score) over (partition by cid order by sid) as current_sum,
avg(score) over (partition by cid order by sid) as current_avg,
count(score) over (partition by cid order by sid) as current_count,
max(score) over (partition by cid order by sid) as current_max,
min(score) over (partition by cid order by sid) as current_min
from score
体现了窗口
根据cid分窗,在每个cid中执行2.1中不分窗的操作
1、unbounded preceding
:从当前分区的第一行开始,到当前行结束。
2、current row
:从当前行开始,也结束于当前行。
3、[numeric expression] preceding
:对于rows来说从当前行之前的第[numeric expression]行开始(共[numeric expression]+1行),到当前行结束。对range来说从小于数值表达式的值开始,到当前行结束。
4、[numeric expression] following
:与[numeric expression] preceding相反。