mysql 8.0 窗口函数

1.

什么是窗口函数
含义:窗口函数也叫OLAP函数(Online Anallytical Processing,联机分析处理),可以对数据进行实时分析处理。

作用:

解决排名,排顺序 问题,分组后的操作

e.g.每个(group by  or partition)班级内部按成绩排名
解决TOPN问题,e.g.每个班级前两名的学生(涉及到 分区)

审批记录表一个用户的上个审批记录:e.g 每个用户的当前审批记录和上一次的审批记录

窗口函数和普通聚合函数也很容易混淆,二者区别如下 :

  • 聚合函数是将多条记录聚合为一条;而窗口函数是每条记录都会执行,有几条记录执行完还是几条(eg:根据学生id 分区成为 3 个小组,可以认为 分成了三个窗口)。

  • 聚合函数也可以用于窗口函数中,这个后面会举例说明。

    2.

  • 按照功能划分,可以把MySQL支持的窗口函数分为如下几类:

  • 序号函数:row_number() / rank() / dense_rank()

  • 分布函数:percent_rank() / cume_dist()

  • 前后函数:lag() / lead()

  • 头尾函数:first_val() / last_val()

  • 其他函数:nth_value() / nfile()

  • 通过partition by将班级分类,相当于之前用过的group by子句功能,但是group by子句分类汇总会改变原数据的行数,而用窗口函数自救保持原行数;
  • 通过order by将成绩降序排列,与之前学的order by子句用法一样,后边可以升序asc或者降序desc;

总结:

  • 窗口函数这里的“窗口”表示范围,可以理解为将原数据划分范围,即分组,然后用函数实现某些目的
  • 窗口函数有分组和排序的功能
  • 不减少原表的行数(重要)

分组后累计求和:




select * from login ;

# +--+-------+---------+----------+
# |id|user_id|client_id|date      |
# +--+-------+---------+----------+
# |1 |2      |1        |2020-10-12|
# |2 |3      |2        |2020-10-12|
# |3 |1      |2        |2020-10-12|
# |4 |1      |3        |2020-10-13|
# |5 |3      |2        |2020-10-13|
# +--+-------+---------+----------+


select * from passing_number ;

# +--+-------+------+----------+
# |id|user_id|number|date      |
# +--+-------+------+----------+
# |1 |2      |4     |2020-10-12|
# |2 |3      |1     |2020-10-12|
# |3 |1      |0     |2020-10-13|
# |4 |3      |2     |2020-10-13|
# +--+-------+------+----------+

 





select * from user  ;

# +--+--------+
# |id|name    |
# +--+--------+
# |1 |tm      |
# |2 |fh      |
# |3 |wangchao|
# +--+--------+

# 牛客每天有很多人登录,请你统计一下牛客每个用户刷题情况,包括: 用户的名字,以及截止到某天,累计总共通过了多少题。 不存在没有登录却刷题的情况,但存在登录了没刷题的情况,不会存在刷题表里面,
# 会存在提交代码没有通过的情况并记录在刷题表里,通过数目是0。
# 请你写出一个sql语句查询刷题信息,包括: 用户的名字,以及截止到某天,累计总共通过了多少题,并且查询结果先按照日期升序排序,再按照姓名升序排序,有登录却没有刷题的哪一天的数据不需要输出,上面的例子查询结果如下
#  (ps:每个用户在每一天的累计数量)


# 方法①
select (select name    from user0 where id =  pn1.user_id) as name ,
       user_id ,date  ,
       (select sum(number)  from passing_number pn2 where date_format(pn2.date,'%Y-%m-%d') <= date_format(pn1.date,'%Y-%m-%d')  and  pn1.user_id = pn2.user_id ) as total
       from passing_number pn1 order by date asc     ;


# +-------+----------+-----+
# |user_id|date      |total|
# +-------+----------+-----+
# |2      |2020-10-12|4    |
# |3      |2020-10-12|1    |
# |1      |2020-10-13|0    |
# |3      |2020-10-13|3    |
# +-------+----------+-----+


select (select name    from user0 where id =  pn1.user_id) as name ,
       user_id ,date  ,
       (select sum(number)  from passing_number pn2 where  pn2.date <= pn1.date and  pn1.user_id = pn2.user_id ) as total
       from passing_number pn1 order by date asc     ;



# 方法②:    用窗口函数按姓名分区按日期排序计算累计数

# sum(a)over()的作用就是按a字段累加----分区连续(累计)求和
select u.name as u_n
        ,p.date as date ,
       p.number
 ,sum(p.number)over(partition by u.name order by date)as ps_num
from passing_number p inner join user u on  p.user_id=u.id

order by p.date,u.name ;
# +--------+----------+------+
# |u_n     |date      |number|
# +--------+----------+------+
# |fh      |2020-10-12|4     |
# |wangchao|2020-10-12|1     |
# |tm      |2020-10-13|0     |
# |wangchao|2020-10-13|2     |
# +--------+----------+------+


 

案例二:

# SQL34 每份试卷每月作答数和截止当月的作答总

#

drop table if exists exam_record;
CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', '2020-01-20 10:10:01', 89),
(1002, 9001, '2020-02-01 12:11:01', '2020-02-01 12:31:01', 83),
(1003, 9001, '2020-03-01 19:01:01', '2020-03-01 19:30:01', 75),
(1004, 9001, '2020-03-01 12:01:01', '2020-03-01 12:11:01', 60),
(1003, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1004, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1003, 9002, '2020-02-02 12:01:01', '2020-02-02 12:31:01', 68),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1001, 9002, '2020-03-02 12:11:01', null, null);


# 请输出每份试卷每月作答数和截止当月的作答总数。
# 由示例数据结果输出如下:
# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001   |202001     |2        |2           |
# |9001   |202002     |1        |3           |
# |9001   |202003     |3        |6           |
# |9001   |202005     |1        |7           |
# |9002   |202001     |1        |1           |
# |9002   |202002     |3        |4           |
# |9002   |202003     |1        |5           |
# +-------+-----------+---------+------------+


# sum() over(partition by 字段1,字段2,......) 的用法:根据 字段1,字段2 ,...... 分区后 ,区内数据的累加

# 先根据 分组获取核心数据(目的结果数据是  7条,所以构造 7 条数据),获取 每个 exam_id , 每个 start_month 的数量 ,sql 如下


select exam_id,
             date_format(start_time, '%Y%m') start_month,
             count(*)                        month_cnt

      from exam_record
      group by exam_id, date_format(start_time, '%Y%m')
      order by exam_id ;

# +-------+-----------+---------+
# |exam_id|start_month|month_cnt|
# +-------+-----------+---------+
# |9001   |202001     |2        |
# |9001   |202002     |1        |
# |9001   |202003     |3        |
# |9001   |202005     |1        |
# |9002   |202001     |1        |
# |9002   |202002     |3        |
# |9002   |202003     |1        |
# +-------+-----------+---------+


# 正确解法: partition by exam_id  ,因为 是在 exam_id 内数据累加的
select exam_id,
       start_month,
       month_cnt,
       sum(month_cnt) over (partition by exam_id  order by exam_id, start_month ) cum_exam_cnt
from (select exam_id,
             date_format(start_time, '%Y%m') start_month,
             count(*)                        month_cnt

      from exam_record
      group by exam_id, date_format(start_time, '%Y%m')
      order by exam_id
     ) Tem;

# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001   |202001     |2        |2           |
# |9001   |202002     |1        |3           |
# |9001   |202003     |3        |6           |
# |9001   |202005     |1        |7           |
# |9002   |202001     |1        |1           |
# |9002   |202002     |3        |4           |
# |9002   |202003     |1        |5           |
# +-------+-----------+---------+------------+



# 错误写法: partition by exam_id, start_month  是根据 exam_id, start_month 累加的,所以 得到的结果不是需求需要的
select exam_id,
       start_month,
       month_cnt,
       sum(month_cnt) over (partition by exam_id, start_month order by exam_id, start_month ) cum_exam_cnt
from (select exam_id,
             date_format(start_time, '%Y%m') start_month,
             count(*)                        month_cnt

      from exam_record
      group by exam_id, date_format(start_time, '%Y%m')
      order by exam_id
     ) Tem;

# +-------+-----------+---------+------------+
# |exam_id|start_month|month_cnt|cum_exam_cnt|
# +-------+-----------+---------+------------+
# |9001   |202001     |2        |2           |
# |9001   |202002     |1        |1           |
# |9001   |202003     |3        |3           |
# |9001   |202005     |1        |1           |
# |9002   |202001     |1        |1           |
# |9002   |202002     |3        |3           |
# |9002   |202003     |1        |1           |
# +-------+-----------+---------+------------+


1. 分组后 组内的总数


#  扩展 count() over(partition by column1 ,column2) 的使用: 根据  column1 ,column2 分组后统计 组内总数,但是呢 结果集的总数和源数据是一样的
#  效果和 count(*) group by 效果类似 ,只是后者会聚合,数据数量可能会改变,前者是窗口函数 ,数据数量不会变
# demo

select * from dept_emp ;

# +------+-------+----------+----------+
# |emp_no|dept_no|from_date |to_date   |
# +------+-------+----------+----------+
# |10001 |d001   |1986-06-26|9999-01-01|
# |10002 |d001   |1996-08-03|9999-01-01|
# |10003 |d002   |1995-12-03|9999-01-01|
# +------+-------+----------+----------+

select dept_no ,count(dept_no) from  dept_emp group by  dept_no order by dept_no desc ;

# +-------+--------------+
# |dept_no|count(dept_no)|
# +-------+--------------+
# |d002   |1             |
# |d001   |2             |
# +-------+--------------+


# 数据量没变
select  dept_no ,count(dept_no) over (partition by dept_no order by dept_no desc  ) as rn from dept_emp ;

# +-------+--+
# |dept_no|rn|
# +-------+--+
# |d001   |2 |
# |d001   |2 |
# |d002   |1 |
# +-------+--+

#
select  dept_no ,count(dept_no) over (partition by dept_no  ) as rn from dept_emp order by dept_no desc  ;
# +-------+--+
# |dept_no|rn|
# +-------+--+
# |d002   |1 |
# |d001   |2 |
# |d001   |2 |
# +-------+--+

1.

分组后排序:



#  窗口函数:分组后对组内的数据进行操作

# 需求:在girl表新增一个 字段 num(唯一) ,在createdTime 一样的基础上 , num 按照 202107230001    202107230002  ..... 当天最大的数据是  202107239999

# 第二天 num  202107240001  202107240002   202107240003  ..... 当天最大的数据是  202107249999
# 思路:
# ① 先用 窗口函数  按照日期 分组,同时进行组内排序 生成 code

# ② 拼接 resultCode 结构(if()函数 或者 case when then else end 语法)

# ③ 执行 更新(update A  set  字段  = (select newValue from Tem where Tem.id = A.id ) or update ( A inner   join Tem  on Tem.id = A.id) set A.字段 = Tem.newValue ;  )



#  分组并且组内排序
SELECT id ,
      ROW_NUMBER() OVER (PARTITION BY date_format(createdTime,'%Y-%m-%d') ORDER BY createdTime asc ) AS code ,
       date_format(createdTime,'%Y%m%d')  as  dataStr ,
        name, createdTime
       FROM girls ;

# +--+----+--------+----+-------------------+
# |id|code|dataStr |name|createdTime        |
# +--+----+--------+----+-------------------+
# |2 |1   |20200723|bbb |2020-07-23 14:13:48|
# |1 |2   |20200723|aaa |2020-07-23 15:13:48|
# |3 |3   |20200723|ccc |2020-07-23 15:13:48|
# |4 |4   |20200723|4   |2020-07-23 16:13:48|
# |6 |1   |20200724|6   |2020-07-24 16:13:48|
# |8 |2   |20200724|8   |2020-07-24 17:13:48|
# |9 |3   |20200724|9   |2020-07-24 18:13:48|
# |10|4   |20200724|10  |2020-07-24 19:13:48|
# |11|5   |20200724|10  |2020-07-24 19:13:48|
# |12|6   |20200724|10  |2020-07-24 19:13:48|
# |13|7   |20200724|10  |2020-07-24 19:13:48|
# |14|8   |20200724|10  |2020-07-24 19:13:48|
# |15|9   |20200724|10  |2020-07-24 19:13:48|
# |16|10  |20200724|10  |2020-07-24 19:13:48|
# |17|11  |20200724|10  |2020-07-24 19:13:48|
# |18|12  |20200724|10  |2020-07-24 19:13:48|
# |19|13  |20200724|10  |2020-07-24 19:13:48|
# |20|14  |20200724|10  |2020-07-24 19:13:48|
# |21|15  |20200724|10  |2020-07-24 19:13:48|
# |22|16  |20200724|10  |2020-07-24 19:13:48|
# |23|17  |20200724|10  |2020-07-24 19:13:48|
# |24|18  |20200724|10  |2020-07-24 19:13:48|
# |25|19  |20200724|10  |2020-07-24 19:13:48|
# +--+----+--------+----+-------------------+

# 拼接 code 结构
select Tem.id,
       Tem.createdTime,
       Tem.code,
       length(Tem.code),
       case when length(Tem.code) = 1
             then concat(date_format(createdTime, '%Y%m%d'),'_000', Tem.code)
           when   length(Tem.code) = 2
           then concat(date_format(createdTime, '%Y%m%d'),'_00', Tem.code)
           when  length(Tem.code) = 3
               then concat(date_format(createdTime, '%Y%m%d'),'_0', Tem.code)
             when  length(Tem.code) = 4
               then concat(date_format(createdTime, '%Y%m%d'),Tem.code)
               end  as resultCode
from (
         SELECT id,
                ROW_NUMBER() OVER (PARTITION BY date_format(createdTime, '%Y-%m-%d') ORDER BY createdTime asc) AS code,
                createdTime                                                                                    as createdTime
         FROM girls
     ) as Tem;




#  更新
update girls set num  = (select resultCode from (select Tem.id,
       Tem.createdTime,
       Tem.code,
       length(Tem.code),
       case when length(Tem.code) = 1
             then concat(date_format(createdTime, '%Y%m%d'),'_000', Tem.code)
           when   length(Tem.code) = 2
           then concat(date_format(createdTime, '%Y%m%d'),'_00', Tem.code)
           when  length(Tem.code) = 3
               then concat(date_format(createdTime, '%Y%m%d'),'_0', Tem.code)
             when  length(Tem.code) = 4
               then concat(date_format(createdTime, '%Y%m%d'),Tem.code)
               end  as resultCode
from (
         SELECT id,
                ROW_NUMBER() OVER (PARTITION BY date_format(createdTime, '%Y-%m-%d') ORDER BY createdTime asc) AS code,
                createdTime                                                                                    as createdTime
         FROM girls
     ) as Tem) Tem2 where Tem2.id = girls.id ) ;


#  查看结果
select createdTime , date_format(createdTime, '%Y%m%d') ,  num  from girls ;

# +-------------------+----------------------------------+-------------+
# |createdTime        |date_format(createdTime, '%Y%m%d')|num          |
# +-------------------+----------------------------------+-------------+
# |2020-07-23 15:13:48|20200723                          |20200723_0002|
# |2020-07-23 14:13:48|20200723                          |20200723_0001|
# |2020-07-23 15:13:48|20200723                          |20200723_0003|
# |2020-07-23 16:13:48|20200723                          |20200723_0004|
# |2020-07-24 16:13:48|20200724                          |20200724_0001|
# |2020-07-24 17:13:48|20200724                          |20200724_0002|
# |2020-07-24 18:13:48|20200724                          |20200724_0003|
# |2020-07-24 19:13:48|20200724                          |20200724_0004|
# |2020-07-24 19:13:48|20200724                          |20200724_0005|
# |2020-07-24 19:13:48|20200724                          |20200724_0006|
# |2020-07-24 19:13:48|20200724                          |20200724_0007|
# |2020-07-24 19:13:48|20200724                          |20200724_0008|
# |2020-07-24 19:13:48|20200724                          |20200724_0009|
# |2020-07-24 19:13:48|20200724                          |20200724_0010|
# |2020-07-24 19:13:48|20200724                          |20200724_0011|
# |2020-07-24 19:13:48|20200724                          |20200724_0012|
# |2020-07-24 19:13:48|20200724                          |20200724_0013|
# |2020-07-24 19:13:48|20200724                          |20200724_0014|
# |2020-07-24 19:13:48|20200724                          |20200724_0015|
# |2020-07-24 19:13:48|20200724                          |20200724_0016|
# |2020-07-24 19:13:48|20200724                          |20200724_0017|
# |2020-07-24 19:13:48|20200724                          |20200724_0018|
# |2020-07-24 19:13:48|20200724                          |20200724_0019|
# +-------------------+----------------------------------+-------------+

demo:

select *
from order_test;

# +--+---------+---------+-------------------+-------------------+------+
# |id|user_guid|user_name|created_time       |modified_time      |amount|
# +--+---------+---------+-------------------+-------------------+------+
# |0 |1        |蚩尤       |0001-06-11 14:41:21|0001-06-11 14:41:21|10    |
# |2 |1        |蚩尤       |0001-06-12 14:41:31|0050-06-11 14:41:31|10    |
# |3 |1        |蚩尤       |0001-06-13 14:41:34|0100-06-11 14:41:34|10    |
# |4 |2        |姬丹       |0500-06-11 14:41:36|0500-06-11 14:41:36|10    |
# |5 |2        |姬丹       |0500-07-11 14:41:38|0509-06-11 14:41:38|10    |
# |6 |2        |姬丹       |0500-08-11 14:41:38|0510-06-11 14:41:38|10    |
# |7 |3        |李元昊      |1000-06-01 14:41:39|1000-06-11 14:41:39|10    |
# |8 |3        |李元昊      |1000-06-11 14:41:40|1001-06-11 14:41:40|10    |
# |9 |3        |李元昊      |1000-06-22 14:41:41|1009-06-11 14:41:41|10    |
# |10|4        |张三       |2000-06-11 14:41:41|2000-06-11 14:41:41|10    |
# |11|4        |张三       |2000-06-11 14:41:41|2010-06-11 14:41:41|10    |
# |12|5        |李四       |2000-06-11 14:41:41|2020-06-11 14:41:41|10    |
# +--+---------+---------+-------------------+-------------------+------+

#  需求 查询每个用户上一个订单距离当前订单的时间间隔。

# ① 子查询(select 后面的子查询结果作为 新的列)
select  user_guid, user_name, created_time,last_time, datediff(created_time,last_time) as diff from (
     select user_guid, user_name, created_time , (
     select   Tem1.created_time
from order_test as Tem1 where Tem1.user_guid = order_test.user_guid and Tem1.created_time < order_test.created_time
    order by  Tem1.created_time desc limit 1
    ) as  last_time
from order_test
                   )  as  Tem2;

# +---------+---------+-------------------+-------------------+----+
# |user_guid|user_name|created_time       |last_time          |diff|
# +---------+---------+-------------------+-------------------+----+
# |1        |蚩尤       |0001-06-11 14:41:21|NULL               |NULL|
# |1        |蚩尤       |0001-06-12 14:41:31|0001-06-11 14:41:21|1   |
# |1        |蚩尤       |0001-06-13 14:41:34|0001-06-12 14:41:31|1   |
# |2        |姬丹       |0500-06-11 14:41:36|NULL               |NULL|
# |2        |姬丹       |0500-07-11 14:41:38|0500-06-11 14:41:36|30  |
# |2        |姬丹       |0500-08-11 14:41:38|0500-07-11 14:41:38|31  |
# |3        |李元昊      |1000-06-01 14:41:39|NULL               |NULL|
# |3        |李元昊      |1000-06-11 14:41:40|1000-06-01 14:41:39|10  |
# |3        |李元昊      |1000-06-22 14:41:41|1000-06-11 14:41:40|11  |
# |4        |张三       |2000-06-11 14:41:41|NULL               |NULL|
# |4        |张三       |2000-06-11 14:41:41|NULL               |NULL|
# |5        |李四       |2000-06-11 14:41:41|NULL               |NULL|
# +---------+---------+-------------------+-------------------+----+

# ② 窗口函数
# 前后函数——lead(n)/lag(n)
# 用途:分区中位于当前行前n行(lead)/后n行(lag)的记录值
# 使用场景:查询上一个订单距离当前订单的时间间隔
select user_guid, user_name, created_time, datediff(created_time, last_date) as diff
from (
         select user_guid,
                user_name,
                created_time,
                lag(created_time, 1) over (partition by user_GUID order by created_time asc ) as last_date
         from order_test
     ) As  tem;

# +---------+---------+-------------------+----+
# |user_guid|user_name|created_time       |diff|
# +---------+---------+-------------------+----+
# |1        |蚩尤       |0001-06-11 14:41:21|NULL|
# |1        |蚩尤       |0001-06-12 14:41:31|1   |
# |1        |蚩尤       |0001-06-13 14:41:34|1   |
# |2        |姬丹       |0500-06-11 14:41:36|NULL|
# |2        |姬丹       |0500-07-11 14:41:38|30  |
# |2        |姬丹       |0500-08-11 14:41:38|31  |
# |3        |李元昊      |1000-06-01 14:41:39|NULL|
# |3        |李元昊      |1000-06-11 14:41:40|10  |
# |3        |李元昊      |1000-06-22 14:41:41|11  |
# |4        |张三       |2000-06-11 14:41:41|NULL|
# |4        |张三       |2000-06-11 14:41:41|0   |
# |5        |李四       |2000-06-11 14:41:41|NULL|
# +---------+---------+-------------------+----+

# 内层SQL先通过lag函数得到当前用户 当前记录 上一次订单的日期 ,外层SQL再将本次订单和上次订单日期做差得到时间间隔diff

#  如果是 0
select user_guid, user_name, created_time, datediff(created_time, last_date) as diff
from (
         select user_guid,
                user_name,
                created_time,
                lag(created_time, 0) over (partition by user_GUID order by created_time asc ) as last_date
         from order_test
     ) As  tem;

# +---------+---------+-------------------+----+
# |user_guid|user_name|created_time       |diff|
# +---------+---------+-------------------+----+
# |1        |蚩尤       |0001-06-11 14:41:21|0   |
# |1        |蚩尤       |0001-06-12 14:41:31|0   |
# |1        |蚩尤       |0001-06-13 14:41:34|0   |
# |2        |姬丹       |0500-06-11 14:41:36|0   |
# |2        |姬丹       |0500-07-11 14:41:38|0   |
# |2        |姬丹       |0500-08-11 14:41:38|0   |
# |3        |李元昊      |1000-06-01 14:41:39|0   |
# |3        |李元昊      |1000-06-11 14:41:40|0   |
# |3        |李元昊      |1000-06-22 14:41:41|0   |
# |4        |张三       |2000-06-11 14:41:41|0   |
# |4        |张三       |2000-06-11 14:41:41|0   |
# |5        |李四       |2000-06-11 14:41:41|0   |
# +---------+---------+-------------------+----+


# 七、头尾函数——first_value(expr)/last_value(expr)
# 用途:得到分区中的第一个/最后一个指定参数的值
# 使用场景:查询截止到当前订单,按照日期排序第一个订单和最后一个订单的订单金额

select user_guid, user_name, created_time, first_date ,last_date
from (
         select user_guid,
                user_name,
                created_time,
                first_value(created_time) over w as first_date ,
                last_value(created_time) over w as last_date
         from order_test
         window w as (partition by user_GUID order by created_time asc )
     ) As  tem;

# +---------+---------+-------------------+-------------------+-------------------+
# |user_guid|user_name|created_time       |first_date         |last_date          |
# +---------+---------+-------------------+-------------------+-------------------+
# |1        |蚩尤       |0001-06-11 14:41:21|0001-06-11 14:41:21|0001-06-11 14:41:21|
# |1        |蚩尤       |0001-06-12 14:41:31|0001-06-11 14:41:21|0001-06-12 14:41:31|
# |1        |蚩尤       |0001-06-31 14:41:34|0001-06-11 14:41:21|0001-06-13 14:41:34|
# |2        |姬丹       |0500-06-11 14:41:36|0500-06-11 14:41:36|0500-06-11 14:41:36|
# |2        |姬丹       |0500-07-11 14:41:38|0500-06-11 14:41:36|0500-07-11 14:41:38|
# |2        |姬丹       |0500-08-11 14:41:38|0500-06-11 14:41:36|0500-08-11 14:41:38|
# |3        |李元昊      |1000-06-01 14:41:39|1000-06-01 14:41:39|1000-06-01 14:41:39|
# |3        |李元昊      |1000-06-11 14:41:40|1000-06-01 14:41:39|1000-06-11 14:41:40|
# |3        |李元昊      |1000-06-22 14:41:41|1000-06-01 14:41:39|1000-06-22 14:41:41|
# |4        |张三       |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# |4        |张三       |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# |5        |李四       |2000-06-11 14:41:41|2000-06-11 14:41:41|2000-06-11 14:41:41|
# +---------+---------+-------------------+-------------------+-------------------+



# 九、聚合函数作为窗口函数
#  用途:在窗口中每条记录动态应用聚合函数(sum/avg/max/min/count),可以动态计算在指定的窗口内的各种聚合函数值。
#  应用场景:每个用户按照订单id,截止到当前的累计订单金额/平均订单金额/最大订单金额/最小订单金额/订单数是多少

# 聚合函数作为窗口函数
# 作用:聚合函数作为窗口函数,是起到"累加/累计"的效果,比如,就是截止到本行,最大值?最小值是多少
select user_guid, user_name, created_time, sum1 ,count1,avg1,max1,min1
from (
         select user_guid,
                user_name,
                created_time,
                sum(amount) over w as sum1 ,
                count(amount) over w as count1 ,
                avg(amount) over w as avg1 ,
                max(amount) over w as max1 ,
                min(amount) over w as min1
         from order_test
         window w as (partition by user_GUID order by created_time asc )
     ) As  tem;


# +---------+---------+-------------------+----+------+-------+----+----+
# |user_guid|user_name|created_time       |sum1|count1|avg1   |max1|min1|
# +---------+---------+-------------------+----+------+-------+----+----+
# |1        |蚩尤       |0001-06-11 14:41:21|10  |1     |10.0000|10  |10  |
# |1        |蚩尤       |0001-06-12 14:41:31|20  |2     |10.0000|10  |10  |
# |1        |蚩尤       |0001-06-13 14:41:34|30  |3     |10.0000|10  |10  |
# |2        |姬丹       |0500-06-11 14:41:36|10  |1     |10.0000|10  |10  |
# |2        |姬丹       |0500-07-11 14:41:38|20  |2     |10.0000|10  |10  |
# |2        |姬丹       |0500-08-11 14:41:38|30  |3     |10.0000|10  |10  |
# |3        |李元昊      |1000-06-01 14:41:39|10  |1     |10.0000|10  |10  |
# |3        |李元昊      |1000-06-11 14:41:40|20  |2     |10.0000|10  |10  |
# |3        |李元昊      |1000-06-22 14:41:41|30  |3     |10.0000|10  |10  |
# |4        |张三       |2000-06-11 14:41:41|20  |2     |10.0000|10  |10  |
# |4        |张三       |2000-06-11 14:41:41|20  |2     |10.0000|10  |10  |
# |5        |李四       |2000-06-11 14:41:41|10  |1     |10.0000|10  |10  |
# +---------+---------+-------------------+----+------+-------+----+----+

# 窗口函数和普通聚合函数也很容易混淆,二者区别如下 :
# 聚合函数是将多条记录聚合为一条;而窗口函数是每条记录都会执行,有几条记录执行完还是几条
# 窗口函数可以理解为记录集合,每条记录都要在窗口内执行函数,多行聚合为多行。MYSQL从8.0版本开始才支持窗口函数









你可能感兴趣的:(mysql,mysql,数据库,database)