个人博客原文链接
普通的聚合函数每组(group by)只返回一个值,而开窗函数则可以为每行都返回一个值。简而言之,相当于对查询的结果添加新的一列值,这一列可以是聚合值,也可以是排序号。开窗函数的关键字是over()。
建表
create table if not exists student_scores(
id string,
studentId string,
language string,
math string,
english string,
classId string,
departmentId string
);
插入数据
insert into table student_scores values
(‘1’,‘111’,‘68’,‘69’,‘90’,‘class1’,‘department1’),
(‘2’,‘112’,‘73’,‘80’,‘96’,‘class1’,‘department1’),
(‘3’,‘113’,‘90’,‘74’,‘75’,‘class1’,‘department1’),
(‘4’,‘114’,‘89’,‘94’,‘93’,‘class1’,‘department2’),
(‘5’,‘115’,‘99’,‘93’,‘89’,‘class1’,‘department1’),
(‘6’,‘121’,‘96’,‘74’,‘79’,‘class2’,‘department1’),
(‘7’,‘122’,‘89’,‘86’,‘85’,‘class2’,‘department1’),
(‘8’,‘123’,‘70’,‘78’,‘61’,‘class2’,‘department1’),
(‘9’,‘124’,‘76’,‘70’,‘76’,‘class2’,‘department1’),
(‘10’,‘211’,‘89’,‘93’,‘60’,‘class1’,‘department2’),
(‘11’,‘212’,‘76’,‘83’,‘75’,‘class1’,‘department2’),
(‘12’,‘213’,‘71’,‘94’,‘90’,‘class2’,‘department2’),
(‘13’,‘214’,‘94’,‘94’,‘66’,‘class1’,‘department2’),
(‘14’,‘215’,‘84’,‘82’,‘73’,‘class1’,‘department2’),
(‘15’,‘216’,‘85’,‘74’,‘93’,‘class1’,‘department2’),
(‘16’,‘221’,‘77’,‘99’,‘61’,‘class2’,‘department2’),
(‘17’,‘222’,‘80’,‘78’,‘96’,‘class2’,‘department2’),
(‘18’,‘223’,‘79’,‘74’,‘96’,‘class2’,‘department2’),
(‘19’,‘224’,‘75’,‘80’,‘78’,‘class2’,‘department2’),
(‘20’,‘225’,‘82’,‘85’,‘63’,‘class2’,‘department2’);
以符合条件的所有行为作为窗口
select studentId,language,math,english,classId,departmentId,count(math) over() as count1
from student_scores;
结果:
111 68 69 90 class1 department1 20
112 73 80 96 class1 department1 20
113 90 74 75 class1 department1 20
114 89 94 93 class1 department2 20
115 99 93 89 class1 department1 20
121 96 74 79 class2 department1 20
122 89 86 85 class2 department1 20
123 70 78 61 class2 department1 20
124 76 70 76 class2 department1 20
211 89 93 60 class1 department2 20
212 76 83 75 class1 department2 20
213 71 94 90 class2 department2 20
214 94 94 66 class1 department2 20
215 84 82 73 class1 department2 20
216 85 74 93 class1 department2 20
221 77 99 61 class2 department2 20
222 80 78 96 class2 department2 20
223 79 74 96 class2 department2 20
224 75 80 78 class2 department2 20
225 82 85 63 class2 department2 20
以根据classId分组的所有行为作为窗口
select studentId,language,math,english,classId,departmentId,count(math) over(partition by classId) as count2
from student_scores;
结果:
121 96 74 79 class2 department1 10
122 89 86 85 class2 department1 10
123 70 78 61 class2 department1 10
124 76 70 76 class2 department1 10
213 71 94 90 class2 department2 10
221 77 99 61 class2 department2 10
222 80 78 96 class2 department2 10
223 79 74 96 class2 department2 10
224 75 80 78 class2 department2 10
225 82 85 63 class2 department2 10
111 68 69 90 class1 department1 10
112 73 80 96 class1 department1 10
113 90 74 75 class1 department1 10
114 89 94 93 class1 department2 10
115 99 93 89 class1 department1 10
211 89 93 60 class1 department2 10
212 76 83 75 class1 department2 10
214 94 94 66 class1 department2 10
215 84 82 73 class1 department2 10
216 85 74 93 class1 department2 10
以根据classId分组、根据math倒序排序(即count大于等于当前行math的值的个数)的所有行为作为窗口
select studentId,language,math,english,classId,departmentId,count(math) over(partition by classId order by math desc) as count3
from student_scores;
结果:
221 77 99 61 class2 department2 1
213 71 94 90 class2 department2 2
122 89 86 85 class2 department1 3
225 82 85 63 class2 department2 4
224 75 80 78 class2 department2 5
123 70 78 61 class2 department1 7
222 80 78 96 class2 department2 7
121 96 74 79 class2 department1 9
223 79 74 96 class2 department2 9
124 76 70 76 class2 department1 10
114 89 94 93 class1 department2 2
214 94 94 66 class1 department2 2
115 99 93 89 class1 department1 4
211 89 93 60 class1 department2 4
212 76 83 75 class1 department2 5
215 84 82 73 class1 department2 6
112 73 80 96 class1 department1 7
113 90 74 75 class1 department1 9
216 85 74 93 class1 department2 9
111 68 69 90 class1 department1 10
以根据classId分组、根据math排序、从当前行前1行到当前行后2行的所有行为作为窗口
select studentId,language,math,english,classId,departmentId,count(math) over(partition by classId order by math desc rows between 1 preceding and 2 following) as count4
from student_scores;
结果:
221 77 99 61 class2 department2 3
213 71 94 90 class2 department2 4
122 89 86 85 class2 department1 4
225 82 85 63 class2 department2 4
224 75 80 78 class2 department2 4
123 70 78 61 class2 department1 4
222 80 78 96 class2 department2 4
121 96 74 79 class2 department1 4
223 79 74 96 class2 department2 3
124 76 70 76 class2 department1 2
114 89 94 93 class1 department2 3
214 94 94 66 class1 department2 4
115 99 93 89 class1 department1 4
211 89 93 60 class1 department2 4
212 76 83 75 class1 department2 4
215 84 82 73 class1 department2 4
112 73 80 96 class1 department1 4
113 90 74 75 class1 department1 4
216 85 74 93 class1 department2 3
111 68 69 90 class1 department1 2
以根据classId分组、根据math排序、从第一行到最后一行的所有行为作为窗口,默认就是所有行,相当于count3
select studentId,language,math,english,classId,departmentId,count(math) over(partition by classId order by math desc rows between unbounded preceding and unbounded following) as count5
from student_scores;
以根据classId分组、根据math排序、从当前行前1行到当前行的所有行为作为窗口
select studentId,language,math,english,classId,departmentId,count(math) over(partition by classId order by math desc rows between 1 preceding and current row) as count6
from student_scores;
结果:
221 77 99 61 class2 department2 1
213 71 94 90 class2 department2 2
122 89 86 85 class2 department1 2
225 82 85 63 class2 department2 2
224 75 80 78 class2 department2 2
123 70 78 61 class2 department1 2
222 80 78 96 class2 department2 2
121 96 74 79 class2 department1 2
223 79 74 96 class2 department2 2
124 76 70 76 class2 department1 2
114 89 94 93 class1 department2 1
214 94 94 66 class1 department2 2
115 99 93 89 class1 department1 2
211 89 93 60 class1 department2 2
212 76 83 75 class1 department2 2
215 84 82 73 class1 department2 2
112 73 80 96 class1 department1 2
113 90 74 75 class1 department1 2
216 85 74 93 class1 department2 2
111 68 69 90 class1 department1 2
以根据classId分组、根据math排序的所有行为作为窗口,从1开始对分区内的数据排序,相同的值无重复排名,即排名是连续的。
select studentId,language,math,english,classId,departmentId,row_number() over(partition by classId order by math desc) as count1
from student_scores;
结果:
221 77 99 61 class2 department2 1
213 71 94 90 class2 department2 2
122 89 86 85 class2 department1 3
225 82 85 63 class2 department2 4
224 75 80 78 class2 department2 5
123 70 78 61 class2 department1 6
222 80 78 96 class2 department2 7
121 96 74 79 class2 department1 8
223 79 74 96 class2 department2 9
124 76 70 76 class2 department1 10
114 89 94 93 class1 department2 1
214 94 94 66 class1 department2 2
115 99 93 89 class1 department1 3
211 89 93 60 class1 department2 4
212 76 83 75 class1 department2 5
215 84 82 73 class1 department2 6
112 73 80 96 class1 department1 7
113 90 74 75 class1 department1 8
216 85 74 93 class1 department2 9
111 68 69 90 class1 department1 10
以根据math排序的所有行为作为窗口,基于over子句中的orderby来确定一组值中一个值的排名,相同的值有重复排名,即排名可能不是连续的。
select studentId,language,math,english,classId,departmentId,rank() over(order by math desc) as count1
from student_scores;
结果:
221 77 99 61 class2 department2 1
114 89 94 93 class1 department2 2
213 71 94 90 class2 department2 2
214 94 94 66 class1 department2 2
115 99 93 89 class1 department1 5
211 89 93 60 class1 department2 5
122 89 86 85 class2 department1 7
225 82 85 63 class2 department2 8
212 76 83 75 class1 department2 9
215 84 82 73 class1 department2 10
112 73 80 96 class1 department1 11
224 75 80 78 class2 department2 11
123 70 78 61 class2 department1 13
222 80 78 96 class2 department2 13
113 90 74 75 class1 department1 15
121 96 74 79 class2 department1 15
216 85 74 93 class1 department2 15
223 79 74 96 class2 department2 15
124 76 70 76 class2 department1 19
111 68 69 90 class1 department1 20
以根据classId分组、math排序的所有行为作为窗口,基于over子句中的orderby来确定一组值中一个值的排名,相同的值有重复排名,即排名可能不是连续的。
select studentId,language,math,english,classId,departmentId,rank() over(partition by classId order by math desc) as count2
from student_scores;
结果:
221 77 99 61 class2 department2 1
213 71 94 90 class2 department2 2
122 89 86 85 class2 department1 3
225 82 85 63 class2 department2 4
224 75 80 78 class2 department2 5
123 70 78 61 class2 department1 6
222 80 78 96 class2 department2 6
121 96 74 79 class2 department1 8
223 79 74 96 class2 department2 8
124 76 70 76 class2 department1 10
114 89 94 93 class1 department2 1
214 94 94 66 class1 department2 1
115 99 93 89 class1 department1 3
211 89 93 60 class1 department2 3
212 76 83 75 class1 department2 5
215 84 82 73 class1 department2 6
112 73 80 96 class1 department1 7
113 90 74 75 class1 department1 8
216 85 74 93 class1 department2 8
111 68 69 90 class1 department1 10
dense_rank与rank的区别在于,相同的值会有重复排名,且排名也是连续的。如两个行的排名为1,则下一个排名为2。
select studentId,language,math,english,classId,departmentId,dense_rank() over(order by math desc) as count1
from student_scores;
结果:
221 77 99 61 class2 department2 1
114 89 94 93 class1 department2 2
213 71 94 90 class2 department2 2
214 94 94 66 class1 department2 2
115 99 93 89 class1 department1 3
211 89 93 60 class1 department2 3
122 89 86 85 class2 department1 4
225 82 85 63 class2 department2 5
212 76 83 75 class1 department2 6
215 84 82 73 class1 department2 7
112 73 80 96 class1 department1 8
224 75 80 78 class2 department2 8
123 70 78 61 class2 department1 9
222 80 78 96 class2 department2 9
113 90 74 75 class1 department1 10
121 96 74 79 class2 department1 10
216 85 74 93 class1 department2 10
223 79 74 96 class2 department2 10
124 76 70 76 class2 department1 11
111 68 69 90 class1 department1 12
简要介绍一下其它不太常用的
max(取窗口内最大值)
select studentId,language,math,english,classId,departmentId,max(math) over(partition by classId) as count1
from student_scores;
min(取窗口内最小值)
select studentId,language,math,english,classId,departmentId,min(math) over(partition by classId) as count1
from student_scores;
sum(取窗口内的和)
select studentId,language,math,english,classId,departmentId,sum(math) over(partition by classId) as count1
from student_scores;
avg(取窗口内的平均值)
select studentId,language,math,english,classId,departmentId,avg(math) over(partition by classId) as count1
from student_scores;
first_value(取窗口内第一个值)
select studentId,language,math,english,classId,departmentId,first_value(math) over(partition by classId) as count1
from student_scores;
last_value(取窗口内最后一个值)
select studentId,language,math,english,classId,departmentId,last_value(math) over(partition by classId) as count1
from student_scores;
ntile(将分区中已排序的值根据大小分组,返回组的排名)
select studentId,language,math,english,classId,departmentId,ntile(2) over(partition by classId order by math desc) as count1
from student_scores;
percent_rank(返回当前行的百分比排行)
select studentId,language,math,english,classId,departmentId,percent_rank() over(partition by classId order by math desc) as count1
from student_scores;