1.
什么是窗口函数
含义:窗口函数也叫OLAP函数(Online Anallytical Processing,联机分析处理),可以对数据进行实时分析处理。
作用:
解决排名,排顺序 问题,分组后的操作
e.g.每个(group by or partition)班级内部按成绩排名
解决TOPN问题,e.g.每个班级前两名的学生(涉及到 分区)
审批记录表一个用户的上个审批记录:e.g 每个用户的当前审批记录和上一次的审批记录
窗口函数和普通聚合函数也很容易混淆,二者区别如下 :
聚合函数是将多条记录聚合为一条;而窗口函数是每条记录都会执行,有几条记录执行完还是几条(eg:根据学生id 分区成为 3 个小组,可以认为 分成了三个窗口)。
聚合函数也可以用于窗口函数中,这个后面会举例说明。
2.
按照功能划分,可以把MySQL支持的窗口函数分为如下几类:
序号函数:row_number() / rank() / dense_rank()
分布函数:percent_rank() / cume_dist()
前后函数:lag() / lead()
头尾函数:first_val() / last_val()
其他函数:nth_value() / nfile()
总结:
分组后累计求和:
select * from login ;
# +--+-------+---------+----------+
# |id|user_id|client_id|date |
# +--+-------+---------+----------+
# |1 |2 |1 |2020-10-12|
# |2 |3 |2 |2020-10-12|
# |3 |1 |2 |2020-10-12|
# |4 |1 |3 |2020-10-13|
# |5 |3 |2 |2020-10-13|
# +--+-------+---------+----------+
select * from passing_number ;
# +--+-------+------+----------+
# |id|user_id|number|date |
# +--+-------+------+----------+
# |1 |2 |4 |2020-10-12|
# |2 |3 |1 |2020-10-12|
# |3 |1 |0 |2020-10-13|
# |4 |3 |2 |2020-10-13|
# +--+-------+------+----------+
select * from user ;
# +--+--------+
# |id|name |
# +--+--------+
# |1 |tm |
# |2 |fh |
# |3 |wangchao|
# +--+--------+
# 牛客每天有很多人登录,请你统计一下牛客每个用户刷题情况,包括: 用户的名字,以及截止到某天,累计总共通过了多少题。 不存在没有登录却刷题的情况,但存在登录了没刷题的情况,不会存在刷题表里面,
# 会存在提交代码没有通过的情况并记录在刷题表里,通过数目是0。
# 请你写出一个sql语句查询刷题信息,包括: 用户的名字,以及截止到某天,累计总共通过了多少题,并且查询结果先按照日期升序排序,再按照姓名升序排序,有登录却没有刷题的哪一天的数据不需要输出,上面的例子查询结果如下
# (ps:每个用户在每一天的累计数量)
# 方法①
select (select name from user0 where id = pn1.user_id) as name ,
user_id ,date ,
(select sum(number) from passing_number pn2 where date_format(pn2.date,'%Y-%m-%d') <= date_format(pn1.date,'%Y-%m-%d') and pn1.user_id = pn2.user_id ) as total
from passing_number pn1 order by date asc ;
# +-------+----------+-----+
# |user_id|date |total|
# +-------+----------+-----+
# |2 |2020-10-12|4 |
# |3 |2020-10-12|1 |
# |1 |2020-10-13|0 |
# |3 |2020-10-13|3 |
# +-------+----------+-----+
select (select name from user0 where id = pn1.user_id) as name ,
user_id ,date ,
(select sum(number) from passing_number pn2 where pn2.date <= pn1.date and pn1.user_id = pn2.user_id ) as total
from passing_number pn1 order by date asc ;
# 方法②: 用窗口函数按姓名分区按日期排序计算累计数
# sum(a)over()的作用就是按a字段累加----分区连续(累计)求和
select u.name as u_n
,p.date as date ,
p.number
,sum(p.number)over(partition by u.name order by date)as ps_num
from passing_number p inner join user u on p.user_id=u.id
order by p.date,u.name ;
# +--------+----------+------+
# |u_n |date |number|
# +--------+----------+------+
# |fh |2020-10-12|4 |
# |wangchao|2020-10-12|1 |
# |tm |2020-10-13|0 |
# |wangchao|2020-10-13|2 |
# +--------+----------+------+
案例二:
# SQL34 每份试卷每月作答数和截止当月的作答总
#
drop table if exists exam_record;
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', '2020-01-20 10:10:01', 89),
(1002, 9001, '2020-02-01 12:11:01', '2020-02-01 12:31:01', 83),
(1003, 9001, '2020-03-01 19:01:01', '2020-03-01 19:30:01', 75),
(1004, 9001, '2020-03-01 12:01:01', '2020-03-01 12:11:01', 60),
(1003, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1004, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1003, 9002, '2020-02-02 12:01:01', '2020-02-02 12:31:01', 68),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1001, 9002, '2020-03-02 12:11:01', null, null);
# 请输出每份试卷每月作答数和截止当月的作答总数。
# 由示例数据结果输出如下:
# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001 |202001 |2 |2 |
# |9001 |202002 |1 |3 |
# |9001 |202003 |3 |6 |
# |9001 |202005 |1 |7 |
# |9002 |202001 |1 |1 |
# |9002 |202002 |3 |4 |
# |9002 |202003 |1 |5 |
# +-------+-----------+---------+------------+
# sum() over(partition by 字段1,字段2,......) 的用法:根据 字段1,字段2 ,...... 分区后 ,区内数据的累加
# 先根据 分组获取核心数据(目的结果数据是 7条,所以构造 7 条数据),获取 每个 exam_id , 每个 start_month 的数量 ,sql 如下
select exam_id,
date_format(start_time, '%Y%m') start_month,
count(*) month_cnt
from exam_record
group by exam_id, date_format(start_time, '%Y%m')
order by exam_id ;
# +-------+-----------+---------+
# |exam_id|start_month|month_cnt|
# +-------+-----------+---------+
# |9001 |202001 |2 |
# |9001 |202002 |1 |
# |9001 |202003 |3 |
# |9001 |202005 |1 |
# |9002 |202001 |1 |
# |9002 |202002 |3 |
# |9002 |202003 |1 |
# +-------+-----------+---------+
# 正确解法: partition by exam_id ,因为 是在 exam_id 内数据累加的
select exam_id,
start_month,
month_cnt,
sum(month_cnt) over (partition by exam_id order by exam_id, start_month ) cum_exam_cnt
from (select exam_id,
date_format(start_time, '%Y%m') start_month,
count(*) month_cnt
from exam_record
group by exam_id, date_format(start_time, '%Y%m')
order by exam_id
) Tem;
# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001 |202001 |2 |2 |
# |9001 |202002 |1 |3 |
# |9001 |202003 |3 |6 |
# |9001 |202005 |1 |7 |
# |9002 |202001 |1 |1 |
# |9002 |202002 |3 |4 |
# |9002 |202003 |1 |5 |
# +-------+-----------+---------+------------+
# 错误写法: partition by exam_id, start_month 是根据 exam_id, start_month 累加的,所以 得到的结果不是需求需要的
select exam_id,
start_month,
month_cnt,
sum(month_cnt) over (partition by exam_id, start_month order by exam_id, start_month ) cum_exam_cnt
from (select exam_id,
date_format(start_time, '%Y%m') start_month,
count(*) month_cnt
from exam_record
group by exam_id, date_format(start_time, '%Y%m')
order by exam_id
) Tem;
# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001 |202001 |2 |2 |
# |9001 |202002 |1 |1 |
# |9001 |202003 |3 |3 |
# |9001 |202005 |1 |1 |
# |9002 |202001 |1 |1 |
# |9002 |202002 |3 |3 |
# |9002 |202003 |1 |1 |
# +-------+-----------+---------+------------+
1. 分组后 组内的总数
# 扩展 count() over(partition by column1 ,column2) 的使用: 根据 column1 ,column2 分组后统计 组内总数,但是呢 结果集的总数和源数据是一样的
# 效果和 count(*) group by 效果类似 ,只是后者会聚合,数据数量可能会改变,前者是窗口函数 ,数据数量不会变
# demo
select * from dept_emp ;
# +------+-------+----------+----------+
# |emp_no|dept_no|from_date |to_date |
# +------+-------+----------+----------+
# |10001 |d001 |1986-06-26|9999-01-01|
# |10002 |d001 |1996-08-03|9999-01-01|
# |10003 |d002 |1995-12-03|9999-01-01|
# +------+-------+----------+----------+
select dept_no ,count(dept_no) from dept_emp group by dept_no order by dept_no desc ;
# +-------+--------------+
# |dept_no|count(dept_no)|
# +-------+--------------+
# |d002 |1 |
# |d001 |2 |
# +-------+--------------+
# 数据量没变
select dept_no ,count(dept_no) over (partition by dept_no order by dept_no desc ) as rn from dept_emp ;
# +-------+--+
# |dept_no|rn|
# +-------+--+
# |d001 |2 |
# |d001 |2 |
# |d002 |1 |
# +-------+--+
#
select dept_no ,count(dept_no) over (partition by dept_no ) as rn from dept_emp order by dept_no desc ;
# +-------+--+
# |dept_no|rn|
# +-------+--+
# |d002 |1 |
# |d001 |2 |
# |d001 |2 |
# +-------+--+
1.
分组后排序:
# 窗口函数:分组后对组内的数据进行操作
# 需求:在girl表新增一个 字段 num(唯一) ,在createdTime 一样的基础上 , num 按照 202107230001 202107230002 ..... 当天最大的数据是 202107239999
# 第二天 num 202107240001 202107240002 202107240003 ..... 当天最大的数据是 202107249999
# 思路:
# ① 先用 窗口函数 按照日期 分组,同时进行组内排序 生成 code
# ② 拼接 resultCode 结构(if()函数 或者 case when then else end 语法)
# ③ 执行 更新(update A set 字段 = (select newValue from Tem where Tem.id = A.id ) or update ( A inner join Tem on Tem.id = A.id) set A.字段 = Tem.newValue ; )
# 分组并且组内排序
SELECT id ,
ROW_NUMBER() OVER (PARTITION BY date_format(createdTime,'%Y-%m-%d') ORDER BY createdTime asc ) AS code ,
date_format(createdTime,'%Y%m%d') as dataStr ,
name, createdTime
FROM girls ;
# +--+----+--------+----+-------------------+
# |id|code|dataStr |name|createdTime |
# +--+----+--------+----+-------------------+
# |2 |1 |20200723|bbb |2020-07-23 14:13:48|
# |1 |2 |20200723|aaa |2020-07-23 15:13:48|
# |3 |3 |20200723|ccc |2020-07-23 15:13:48|
# |4 |4 |20200723|4 |2020-07-23 16:13:48|
# |6 |1 |20200724|6 |2020-07-24 16:13:48|
# |8 |2 |20200724|8 |2020-07-24 17:13:48|
# |9 |3 |20200724|9 |2020-07-24 18:13:48|
# |10|4 |20200724|10 |2020-07-24 19:13:48|
# |11|5 |20200724|10 |2020-07-24 19:13:48|
# |12|6 |20200724|10 |2020-07-24 19:13:48|
# |13|7 |20200724|10 |2020-07-24 19:13:48|
# |14|8 |20200724|10 |2020-07-24 19:13:48|
# |15|9 |20200724|10 |2020-07-24 19:13:48|
# |16|10 |20200724|10 |2020-07-24 19:13:48|
# |17|11 |20200724|10 |2020-07-24 19:13:48|
# |18|12 |20200724|10 |2020-07-24 19:13:48|
# |19|13 |20200724|10 |2020-07-24 19:13:48|
# |20|14 |20200724|10 |2020-07-24 19:13:48|
# |21|15 |20200724|10 |2020-07-24 19:13:48|
# |22|16 |20200724|10 |2020-07-24 19:13:48|
# |23|17 |20200724|10 |2020-07-24 19:13:48|
# |24|18 |20200724|10 |2020-07-24 19:13:48|
# |25|19 |20200724|10 |2020-07-24 19:13:48|
# +--+----+--------+----+-------------------+
# 拼接 code 结构
select Tem.id,
Tem.createdTime,
Tem.code,
length(Tem.code),
case when length(Tem.code) = 1
then concat(date_format(createdTime, '%Y%m%d'),'_000', Tem.code)
when length(Tem.code) = 2
then concat(date_format(createdTime, '%Y%m%d'),'_00', Tem.code)
when length(Tem.code) = 3
then concat(date_format(createdTime, '%Y%m%d'),'_0', Tem.code)
when length(Tem.code) = 4
then concat(date_format(createdTime, '%Y%m%d'),Tem.code)
end as resultCode
from (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY date_format(createdTime, '%Y-%m-%d') ORDER BY createdTime asc) AS code,
createdTime as createdTime
FROM girls
) as Tem;
# 更新
update girls set num = (select resultCode from (select Tem.id,
Tem.createdTime,
Tem.code,
length(Tem.code),
case when length(Tem.code) = 1
then concat(date_format(createdTime, '%Y%m%d'),'_000', Tem.code)
when length(Tem.code) = 2
then concat(date_format(createdTime, '%Y%m%d'),'_00', Tem.code)
when length(Tem.code) = 3
then concat(date_format(createdTime, '%Y%m%d'),'_0', Tem.code)
when length(Tem.code) = 4
then concat(date_format(createdTime, '%Y%m%d'),Tem.code)
end as resultCode
from (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY date_format(createdTime, '%Y-%m-%d') ORDER BY createdTime asc) AS code,
createdTime as createdTime
FROM girls
) as Tem) Tem2 where Tem2.id = girls.id ) ;
# 查看结果
select createdTime , date_format(createdTime, '%Y%m%d') , num from girls ;
# +-------------------+----------------------------------+-------------+
# |createdTime |date_format(createdTime, '%Y%m%d')|num |
# +-------------------+----------------------------------+-------------+
# |2020-07-23 15:13:48|20200723 |20200723_0002|
# |2020-07-23 14:13:48|20200723 |20200723_0001|
# |2020-07-23 15:13:48|20200723 |20200723_0003|
# |2020-07-23 16:13:48|20200723 |20200723_0004|
# |2020-07-24 16:13:48|20200724 |20200724_0001|
# |2020-07-24 17:13:48|20200724 |20200724_0002|
# |2020-07-24 18:13:48|20200724 |20200724_0003|
# |2020-07-24 19:13:48|20200724 |20200724_0004|
# |2020-07-24 19:13:48|20200724 |20200724_0005|
# |2020-07-24 19:13:48|20200724 |20200724_0006|
# |2020-07-24 19:13:48|20200724 |20200724_0007|
# |2020-07-24 19:13:48|20200724 |20200724_0008|
# |2020-07-24 19:13:48|20200724 |20200724_0009|
# |2020-07-24 19:13:48|20200724 |20200724_0010|
# |2020-07-24 19:13:48|20200724 |20200724_0011|
# |2020-07-24 19:13:48|20200724 |20200724_0012|
# |2020-07-24 19:13:48|20200724 |20200724_0013|
# |2020-07-24 19:13:48|20200724 |20200724_0014|
# |2020-07-24 19:13:48|20200724 |20200724_0015|
# |2020-07-24 19:13:48|20200724 |20200724_0016|
# |2020-07-24 19:13:48|20200724 |20200724_0017|
# |2020-07-24 19:13:48|20200724 |20200724_0018|
# |2020-07-24 19:13:48|20200724 |20200724_0019|
# +-------------------+----------------------------------+-------------+
demo:
select *
from order_test;
# +--+---------+---------+-------------------+-------------------+------+
# |id|user_guid|user_name|created_time |modified_time |amount|
# +--+---------+---------+-------------------+-------------------+------+
# |0 |1 |蚩尤 |0001-06-11 14:41:21|0001-06-11 14:41:21|10 |
# |2 |1 |蚩尤 |0001-06-12 14:41:31|0050-06-11 14:41:31|10 |
# |3 |1 |蚩尤 |0001-06-13 14:41:34|0100-06-11 14:41:34|10 |
# |4 |2 |姬丹 |0500-06-11 14:41:36|0500-06-11 14:41:36|10 |
# |5 |2 |姬丹 |0500-07-11 14:41:38|0509-06-11 14:41:38|10 |
# |6 |2 |姬丹 |0500-08-11 14:41:38|0510-06-11 14:41:38|10 |
# |7 |3 |李元昊 |1000-06-01 14:41:39|1000-06-11 14:41:39|10 |
# |8 |3 |李元昊 |1000-06-11 14:41:40|1001-06-11 14:41:40|10 |
# |9 |3 |李元昊 |1000-06-22 14:41:41|1009-06-11 14:41:41|10 |
# |10|4 |张三 |2000-06-11 14:41:41|2000-06-11 14:41:41|10 |
# |11|4 |张三 |2000-06-11 14:41:41|2010-06-11 14:41:41|10 |
# |12|5 |李四 |2000-06-11 14:41:41|2020-06-11 14:41:41|10 |
# +--+---------+---------+-------------------+-------------------+------+
# 需求 查询每个用户上一个订单距离当前订单的时间间隔。
# ① 子查询(select 后面的子查询结果作为 新的列)
select user_guid, user_name, created_time,last_time, datediff(created_time,last_time) as diff from (
select user_guid, user_name, created_time , (
select Tem1.created_time
from order_test as Tem1 where Tem1.user_guid = order_test.user_guid and Tem1.created_time < order_test.created_time
order by Tem1.created_time desc limit 1
) as last_time
from order_test
) as Tem2;
# +---------+---------+-------------------+-------------------+----+
# |user_guid|user_name|created_time |last_time |diff|
# +---------+---------+-------------------+-------------------+----+
# |1 |蚩尤 |0001-06-11 14:41:21|NULL |NULL|
# |1 |蚩尤 |0001-06-12 14:41:31|0001-06-11 14:41:21|1 |
# |1 |蚩尤 |0001-06-13 14:41:34|0001-06-12 14:41:31|1 |
# |2 |姬丹 |0500-06-11 14:41:36|NULL |NULL|
# |2 |姬丹 |0500-07-11 14:41:38|0500-06-11 14:41:36|30 |
# |2 |姬丹 |0500-08-11 14:41:38|0500-07-11 14:41:38|31 |
# |3 |李元昊 |1000-06-01 14:41:39|NULL |NULL|
# |3 |李元昊 |1000-06-11 14:41:40|1000-06-01 14:41:39|10 |
# |3 |李元昊 |1000-06-22 14:41:41|1000-06-11 14:41:40|11 |
# |4 |张三 |2000-06-11 14:41:41|NULL |NULL|
# |4 |张三 |2000-06-11 14:41:41|NULL |NULL|
# |5 |李四 |2000-06-11 14:41:41|NULL |NULL|
# +---------+---------+-------------------+-------------------+----+
# ② 窗口函数
# 前后函数——lead(n)/lag(n)
# 用途:分区中位于当前行前n行(lead)/后n行(lag)的记录值
# 使用场景:查询上一个订单距离当前订单的时间间隔
select user_guid, user_name, created_time, datediff(created_time, last_date) as diff
from (
select user_guid,
user_name,
created_time,
lag(created_time, 1) over (partition by user_GUID order by created_time asc ) as last_date
from order_test
) As tem;
# +---------+---------+-------------------+----+
# |user_guid|user_name|created_time |diff|
# +---------+---------+-------------------+----+
# |1 |蚩尤 |0001-06-11 14:41:21|NULL|
# |1 |蚩尤 |0001-06-12 14:41:31|1 |
# |1 |蚩尤 |0001-06-13 14:41:34|1 |
# |2 |姬丹 |0500-06-11 14:41:36|NULL|
# |2 |姬丹 |0500-07-11 14:41:38|30 |
# |2 |姬丹 |0500-08-11 14:41:38|31 |
# |3 |李元昊 |1000-06-01 14:41:39|NULL|
# |3 |李元昊 |1000-06-11 14:41:40|10 |
# |3 |李元昊 |1000-06-22 14:41:41|11 |
# |4 |张三 |2000-06-11 14:41:41|NULL|
# |4 |张三 |2000-06-11 14:41:41|0 |
# |5 |李四 |2000-06-11 14:41:41|NULL|
# +---------+---------+-------------------+----+
# 内层SQL先通过lag函数得到当前用户 当前记录 上一次订单的日期 ,外层SQL再将本次订单和上次订单日期做差得到时间间隔diff
# 如果是 0
select user_guid, user_name, created_time, datediff(created_time, last_date) as diff
from (
select user_guid,
user_name,
created_time,
lag(created_time, 0) over (partition by user_GUID order by created_time asc ) as last_date
from order_test
) As tem;
# +---------+---------+-------------------+----+
# |user_guid|user_name|created_time |diff|
# +---------+---------+-------------------+----+
# |1 |蚩尤 |0001-06-11 14:41:21|0 |
# |1 |蚩尤 |0001-06-12 14:41:31|0 |
# |1 |蚩尤 |0001-06-13 14:41:34|0 |
# |2 |姬丹 |0500-06-11 14:41:36|0 |
# |2 |姬丹 |0500-07-11 14:41:38|0 |
# |2 |姬丹 |0500-08-11 14:41:38|0 |
# |3 |李元昊 |1000-06-01 14:41:39|0 |
# |3 |李元昊 |1000-06-11 14:41:40|0 |
# |3 |李元昊 |1000-06-22 14:41:41|0 |
# |4 |张三 |2000-06-11 14:41:41|0 |
# |4 |张三 |2000-06-11 14:41:41|0 |
# |5 |李四 |2000-06-11 14:41:41|0 |
# +---------+---------+-------------------+----+
# 七、头尾函数——first_value(expr)/last_value(expr)
# 用途:得到分区中的第一个/最后一个指定参数的值
# 使用场景:查询截止到当前订单,按照日期排序第一个订单和最后一个订单的订单金额
select user_guid, user_name, created_time, first_date ,last_date
from (
select user_guid,
user_name,
created_time,
first_value(created_time) over w as first_date ,
last_value(created_time) over w as last_date
from order_test
window w as (partition by user_GUID order by created_time asc )
) As tem;
# +---------+---------+-------------------+-------------------+-------------------+
# |user_guid|user_name|created_time |first_date |last_date |
# +---------+---------+-------------------+-------------------+-------------------+
# |1 |蚩尤 |0001-06-11 14:41:21|0001-06-11 14:41:21|0001-06-11 14:41:21|
# |1 |蚩尤 |0001-06-12 14:41:31|0001-06-11 14:41:21|0001-06-12 14:41:31|
# |1 |蚩尤 |0001-06-31 14:41:34|0001-06-11 14:41:21|0001-06-13 14:41:34|
# |2 |姬丹 |0500-06-11 14:41:36|0500-06-11 14:41:36|0500-06-11 14:41:36|
# |2 |姬丹 |0500-07-11 14:41:38|0500-06-11 14:41:36|0500-07-11 14:41:38|
# |2 |姬丹 |0500-08-11 14:41:38|0500-06-11 14:41:36|0500-08-11 14:41:38|
# |3 |李元昊 |1000-06-01 14:41:39|1000-06-01 14:41:39|1000-06-01 14:41:39|
# |3 |李元昊 |1000-06-11 14:41:40|1000-06-01 14:41:39|1000-06-11 14:41:40|
# |3 |李元昊 |1000-06-22 14:41:41|1000-06-01 14:41:39|1000-06-22 14:41:41|
# |4 |张三 |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# |4 |张三 |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# |5 |李四 |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# +---------+---------+-------------------+-------------------+-------------------+
# 九、聚合函数作为窗口函数
# 用途:在窗口中每条记录动态应用聚合函数(sum/avg/max/min/count),可以动态计算在指定的窗口内的各种聚合函数值。
# 应用场景:每个用户按照订单id,截止到当前的累计订单金额/平均订单金额/最大订单金额/最小订单金额/订单数是多少
# 聚合函数作为窗口函数
# 作用:聚合函数作为窗口函数,是起到"累加/累计"的效果,比如,就是截止到本行,最大值?最小值是多少
select user_guid, user_name, created_time, sum1 ,count1,avg1,max1,min1
from (
select user_guid,
user_name,
created_time,
sum(amount) over w as sum1 ,
count(amount) over w as count1 ,
avg(amount) over w as avg1 ,
max(amount) over w as max1 ,
min(amount) over w as min1
from order_test
window w as (partition by user_GUID order by created_time asc )
) As tem;
# +---------+---------+-------------------+----+------+-------+----+----+
# |user_guid|user_name|created_time |sum1|count1|avg1 |max1|min1|
# +---------+---------+-------------------+----+------+-------+----+----+
# |1 |蚩尤 |0001-06-11 14:41:21|10 |1 |10.0000|10 |10 |
# |1 |蚩尤 |0001-06-12 14:41:31|20 |2 |10.0000|10 |10 |
# |1 |蚩尤 |0001-06-13 14:41:34|30 |3 |10.0000|10 |10 |
# |2 |姬丹 |0500-06-11 14:41:36|10 |1 |10.0000|10 |10 |
# |2 |姬丹 |0500-07-11 14:41:38|20 |2 |10.0000|10 |10 |
# |2 |姬丹 |0500-08-11 14:41:38|30 |3 |10.0000|10 |10 |
# |3 |李元昊 |1000-06-01 14:41:39|10 |1 |10.0000|10 |10 |
# |3 |李元昊 |1000-06-11 14:41:40|20 |2 |10.0000|10 |10 |
# |3 |李元昊 |1000-06-22 14:41:41|30 |3 |10.0000|10 |10 |
# |4 |张三 |2000-06-11 14:41:41|20 |2 |10.0000|10 |10 |
# |4 |张三 |2000-06-11 14:41:41|20 |2 |10.0000|10 |10 |
# |5 |李四 |2000-06-11 14:41:41|10 |1 |10.0000|10 |10 |
# +---------+---------+-------------------+----+------+-------+----+----+
# 窗口函数和普通聚合函数也很容易混淆,二者区别如下 :
# 聚合函数是将多条记录聚合为一条;而窗口函数是每条记录都会执行,有几条记录执行完还是几条
# 窗口函数可以理解为记录集合,每条记录都要在窗口内执行函数,多行聚合为多行。MYSQL从8.0版本开始才支持窗口函数