SQL 高阶查询 - 基于 CTE 和 Window Function
阅读须知
- 示例库来源于 .../tidb-ansible/resources/bin/tispark-sample-data/
- 数据库基于 MySQL 8.0
- 对 CTE 有着基本的了解
- 对 Window Function 有着基本的了解
更新列的值为唯一值
假设由于数据质量的问题,CUSTOMER 表的 C_CUSTKEY 列的值不是唯一的。我们的任务是更新所有行,使其值为唯一的整数,并以 C_NAME 倒排序从 1 开始。
准备示例表
CREATE TABLE CUSTOMER_NULLID
AS
SELECT NULL AS 'C_CUSTKEY', C_NAME, C_ADDRESS, C_NATIONKEY, C_PHONE
, C_ACCTBAL, C_MKTSEGMENT, C_COMMENT
FROM CUSTOMER
Dummy
+-----------+--------------------+--------------------------------------+-----+
| C_CUSTKEY | C_NAME | C_ADDRESS | ... |
+-----------+--------------------+--------------------------------------+-----+
| NULL | Customer#000000001 | IVhzIApeRb ot,c,E | ... |
| NULL | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | ... |
| NULL | Customer#000000003 | MG9kdTD2WBHm | ... |
| NULL | Customer#000000004 | XxVSJsLAGtn | ... |
| NULL | Customer#000000005 | KvpyuHCplrB84WgAiGV6sYpZq7Tj | ... |
| NULL | Customer#000000006 | sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn | ... |
| NULL | Customer#000000007 | TcGe5gaZNgVePxU5kRrvXBfkasDTea | ... |
+-----------+--------------------+--------------------------------------+-----+
Code
WITH CTE AS (
SELECT
ROW_NUMBER ( ) OVER ( ORDER BY C_NAME DESC) AS ROWID,
C_NAME,
C_ADDRESS,
C_NATIONKEY,
C_PHONE,
C_ACCTBAL,
C_MKTSEGMENT,
C_COMMENT
FROM
CUSTOMER_NULLID
) UPDATE CTE
SET C_CUSTKEY = ROWID
Result
+-------+--------------------+-----------------------------------------+-----+
| ROWID | C_NAME | C_ADDRESS | ... |
+-------+--------------------+-----------------------------------------+-----+
| 1 | Customer#000001500 | 4zaoUzuWUTNFiNPbmu43 | ... |
| 2 | Customer#000001499 | 4,6jWOEqfnuXkwhB7gs0M9TcWJlaJNv4bt | ... |
| 3 | Customer#000001498 | x XToT5oFi7oIsRG2mgIL3ncvYJoWBsufsQ7N,z | ... |
| 4 | Customer#000001497 | D8e2U3gYd57H4grcOr,02 | ... |
| 5 | Customer#000001496 | ZOyMxutVHpJy | ... |
| 6 | Customer#000001495 | 78w5H7VJSo0Ps,jqeoCWS4Kay17ygM4RtIH | ... |
| 7 | Customer#000001494 | 4V71P ku3jrqBfQp | ... |
+-------+--------------------+-----------------------------------------+-----+
删除重复
删除重复数据是一种常见的需求,尤其是由于缺乏强制执行约束的唯一性,致使数据存在重复行。
准备示例表
CREATE TABLE CUSTOMER_DUPKEY
AS
SELECT *
FROM CUSTOMER
UNION ALL
SELECT *
FROM CUSTOMER
Dummy
+-----------+--------------------+--------------------------------+-----+
| C_CUSTKEY | C_NAME | C_ADDRESS | ... |
+-----------+--------------------+--------------------------------+-----+
| 1 | Customer#000000001 | IVhzIApeRb ot,c,E | ... |
| 1 | Customer#000000001 | IVhzIApeRb ot,c,E | ... |
| 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | ... |
| 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | ... |
| 3 | Customer#000000003 | MG9kdTD2WBHm | ... |
| 3 | Customer#000000003 | MG9kdTD2WBHm | ... |
| 4 | Customer#000000004 | XxVSJsLAGtn | ... |
+-----------+--------------------+--------------------------------+-----+
Code
WITH CTE AS
(
SELECT C_CUSTKEY,ROW_NUMBER() OVER(PARTITION BY C_CUSTKEY ORDER BY C_CUSTKEY) AS N FROM CUSTOMER_DUPKEY
)
DELETE FROM CTE WHERE N>1;
Stage
+-----------+---+--------------------+--------------------------------+-----+
| C_CUSTKEY | N | C_NAME | C_ADDRESS | ... |
+-----------+---+--------------------+--------------------------------+-----+
| 1 | 1 | Customer#000000001 | IVhzIApeRb ot,c,E | ... |
| 1 | 2 | Customer#000000001 | IVhzIApeRb ot,c,E | ... |
| 2 | 1 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | ... |
| 2 | 2 | Customer#000000002 | XSTf4,NCwDVaWNe6tEgvwfmRchLXak | ... |
| 3 | 1 | Customer#000000003 | MG9kdTD2WBHm | ... |
| 3 | 2 | Customer#000000003 | MG9kdTD2WBHm | ... |
| 4 | 1 | Customer#000000004 | XxVSJsLAGtn | ... |
+-----------+---+--------------------+--------------------------------+-----+
每组前 N 行
当需要从每个组或分区中根据某种指定排序筛选出一定数量的行时,通常都会用到每组前 N 行查询。查询 ORDERS 表,并为每一个客户返回最近的 3 个订单,就是每组前 N 行的示例。
CODE
WITH CTE AS (
SELECT O_ORDERKEY, O_CUSTKEY, O_ORDERSTATUS, O_TOTALPRICE, O_ORDERDATE
, O_ORDERPRIORITY, O_CLERK, O_SHIPPRIORITY, O_COMMENT, ROW_NUMBER() OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE DESC) AS ROWNUM
FROM ORDERS
)
SELECT *
FROM CTE
WHERE ROWNUM <= 3
ORDER BY O_CUSTKEY, ROWNUM
Result
+------------+-----------+--------------+-------------+-----------------+-----+--------+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | O_CLERK | ... | ROWNUM |
+------------+-----------+--------------+-------------+-----------------+-----+--------+
| 34019 | 1 | 89230.03 | 1998-03-29 | Clerk#000000815 | ... | 1 |
| 14656 | 1 | 28599.83 | 1997-11-18 | Clerk#000000270 | ... | 2 |
| 9154 | 1 | 357345.46 | 1997-06-23 | Clerk#000000328 | ... | 3 |
| 16129 | 2 | 50178.44 | 1998-05-18 | Clerk#000000958 | ... | 1 |
| 28167 | 2 | 37156.97 | 1997-05-24 | Clerk#000000638 | ... | 2 |
| 40070 | 2 | 54402.20 | 1996-09-17 | Clerk#000000890 | ... | 3 |
| 54018 | 4 | 50422.56 | 1998-06-24 | Clerk#000000892 | ... | 1 |
+------------+-----------+--------------+-------------+-----------------+-----+--------+
统计总和
计算总和是很普遍的需求。其基本思想是,基于在一列或多列中定义的排序,以及在一列或多列定义的分区行内,累加另一列的值。查询 ORDERS 表,并为每一个客户返回其每次购物后累计的金额。
Code
WITH CTE AS
(
SELECT
O_ORDERKEY,
O_CUSTKEY,
O_TOTALPRICE,
O_ORDERDATE,
SUM(O_TOTALPRICE) OVER(PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS HIS_TOTALPRICE
FROM ORDERS
)
SELECT * FROM CTE ORDER BY O_CUSTKEY,O_ORDERDATE
Result
+------------+-----------+--------------+-------------+----------------+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | HIS_TOTALPRICE |
+------------+-----------+--------------+-------------+----------------+
| 31653 | 1 | 152411.41 | 1993-06-05 | 152411.41 |
| 43879 | 1 | 83095.85 | 1993-08-13 | 235507.26 |
| 52263 | 1 | 51134.82 | 1994-05-08 | 286642.08 |
| 53283 | 1 | 165928.33 | 1995-10-29 | 452570.41 |
| 24322 | 1 | 231040.44 | 1997-01-29 | 683610.85 |
| 36422 | 1 | 270087.44 | 1997-03-04 | 953698.29 |
| 9154 | 1 | 357345.46 | 1997-06-23 | 1311043.75 |
| 14656 | 1 | 28599.83 | 1997-11-18 | 1339643.58 |
| 34019 | 1 | 89230.03 | 1998-03-29 | 1428873.61 |
| 6980 | 2 | 170842.93 | 1993-02-19 | 170842.93 |
| 29408 | 2 | 154867.09 | 1993-05-03 | 325710.02 |
| 10563 | 2 | 143707.70 | 1993-09-30 | 469417.72 |
| 29956 | 2 | 116247.57 | 1994-08-15 | 585665.29 |
| 44962 | 2 | 45657.87 | 1994-12-29 | 631323.16 |
| 20257 | 2 | 181875.60 | 1996-03-04 | 813198.76 |
| 38276 | 2 | 201568.55 | 1996-03-13 | 1014767.31 |
| 40070 | 2 | 54402.20 | 1996-09-17 | 1069169.51 |
| 28167 | 2 | 37156.97 | 1997-05-24 | 1106326.48 |
| 16129 | 2 | 50178.44 | 1998-05-18 | 1156504.92 |
| 22466 | 4 | 130160.51 | 1992-03-29 | 130160.51 |
+------------+-----------+--------------+-------------+----------------+
数据差距和数据岛
数据差距和数据岛是经典的 SQL 问题。其基本概念是,我们有一些数字、日期或时间序列,其中的序列值之间应该是有固定的间隔,但有些序列值可能会丢失。那么数据差距问题就是识别序列中缺失值的所有范围,数据岛问题涉及识别现有值的所有范围。
数据差距
如上所述,数据差距问题涉及识别序列中缺失值的范围。查询 ORDERS 表,查询每一个客户 2 次购物的间隔月数(回头客统计)。
Dummy
+------------+-----------+--------------+-------------+-----+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | ... |
+------------+-----------+--------------+-------------+-----+
| 31653 | 1 | 152411.41 | 1993-06-05 | ... |
| 43879 | 1 | 83095.85 | 1993-08-13 | ... |
| 52263 | 1 | 51134.82 | 1994-05-08 | ... |
| 53283 | 1 | 165928.33 | 1995-10-29 | ... |
| 24322 | 1 | 231040.44 | 1997-01-29 | ... |
| 36422 | 1 | 270087.44 | 1997-03-04 | ... |
| 9154 | 1 | 357345.46 | 1997-06-23 | ... |
| 14656 | 1 | 28599.83 | 1997-11-18 | ... |
| 34019 | 1 | 89230.03 | 1998-03-29 | ... |
| 6980 | 2 | 170842.93 | 1993-02-19 | ... |
| 29408 | 2 | 154867.09 | 1993-05-03 | ... |
| 10563 | 2 | 143707.70 | 1993-09-30 | ... |
| 29956 | 2 | 116247.57 | 1994-08-15 | ... |
| 44962 | 2 | 45657.87 | 1994-12-29 | ... |
| 20257 | 2 | 181875.60 | 1996-03-04 | ... |
| 38276 | 2 | 201568.55 | 1996-03-13 | ... |
| 40070 | 2 | 54402.20 | 1996-09-17 | ... |
| 28167 | 2 | 37156.97 | 1997-05-24 | ... |
| 16129 | 2 | 50178.44 | 1998-05-18 | ... |
| 22466 | 4 | 130160.51 | 1992-03-29 | ... |
+------------+-----------+--------------+-------------+-----+
Code
WITH CTE AS (
SELECT O_ORDERKEY, O_CUSTKEY, O_TOTALPRICE, O_ORDERDATE
, LEAD(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE) AS NXT
FROM TPCH_001.ORDERS
)
SELECT O_ORDERKEY, O_CUSTKEY, O_TOTALPRICE, O_ORDERDATE, NXT
, TIMESTAMPDIFF(MONTH, O_ORDERDATE, NXT) AS DIFF
FROM CTE
WHERE TIMESTAMPDIFF(MONTH, O_ORDERDATE, NXT) > 1
ORDER BY O_CUSTKEY, O_ORDERDATE
Result
+------------+-----------+--------------+-------------+------------+------+-----+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | NXT | DIFF | ... |
+------------+-----------+--------------+-------------+------------+------+-----+
| 31653 | 1 | 152411.41 | 1993-06-05 | 1993-08-13 | 2 | ... |
| 43879 | 1 | 83095.85 | 1993-08-13 | 1994-05-08 | 8 | ... |
| 52263 | 1 | 51134.82 | 1994-05-08 | 1995-10-29 | 17 | ... |
| 53283 | 1 | 165928.33 | 1995-10-29 | 1997-01-29 | 15 | ... |
| 36422 | 1 | 270087.44 | 1997-03-04 | 1997-06-23 | 3 | ... |
| 9154 | 1 | 357345.46 | 1997-06-23 | 1997-11-18 | 4 | ... |
| 14656 | 1 | 28599.83 | 1997-11-18 | 1998-03-29 | 4 | ... |
| 6980 | 2 | 170842.93 | 1993-02-19 | 1993-05-03 | 2 | ... |
| 29408 | 2 | 154867.09 | 1993-05-03 | 1993-09-30 | 4 | ... |
| 10563 | 2 | 143707.70 | 1993-09-30 | 1994-08-15 | 10 | ... |
| 29956 | 2 | 116247.57 | 1994-08-15 | 1994-12-29 | 4 | ... |
| 44962 | 2 | 45657.87 | 1994-12-29 | 1996-03-04 | 14 | ... |
| 38276 | 2 | 201568.55 | 1996-03-13 | 1996-09-17 | 6 | ... |
| 40070 | 2 | 54402.20 | 1996-09-17 | 1997-05-24 | 8 | ... |
| 28167 | 2 | 37156.97 | 1997-05-24 | 1998-05-18 | 11 | ... |
| 44486 | 4 | 39197.54 | 1992-07-10 | 1993-01-08 | 5 | ... |
| 38117 | 4 | 148782.33 | 1993-04-21 | 1994-04-03 | 11 | ... |
| 36386 | 4 | 6829.14 | 1994-04-03 | 1994-11-06 | 7 | ... |
| 56196 | 4 | 199826.92 | 1994-11-06 | 1995-05-22 | 6 | ... |
| 25280 | 4 | 115422.02 | 1995-05-22 | 1995-07-30 | 2 | ... |
+------------+-----------+--------------+-------------+------------+------+-----+
数据岛
数据岛的问题包括确定现有序列值的范围,常见的有连续签到、连续还贷等。查询 ORDERS 表,查询每一个客户连续多月持续购物的起止时间(用户活跃)。
Dummy
+------------+-----------+--------------+-------------+-----+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | ... |
+------------+-----------+--------------+-------------+-----+
| 22466 | 4 | 130160.51 | 1992-03-29 | ... |
| 10688 | 4 | 43453.24 | 1992-05-13 | ... |
| 26407 | 4 | 275581.07 | 1992-06-20 | ... |
| 44486 | 4 | 39197.54 | 1992-07-10 | ... |
| 54722 | 4 | 208672.53 | 1993-01-08 | ... |
| 20386 | 4 | 78070.68 | 1993-02-28 | ... |
| 26852 | 4 | 77869.29 | 1993-03-07 | ... |
| 38117 | 4 | 148782.33 | 1993-04-21 | ... |
| 36386 | 4 | 6829.14 | 1994-04-03 | ... |
| 56196 | 4 | 199826.92 | 1994-11-06 | ... |
| 25280 | 4 | 115422.02 | 1995-05-22 | ... |
| 14947 | 4 | 165118.45 | 1995-07-30 | ... |
| 59079 | 4 | 29140.97 | 1995-10-05 | ... |
| 46951 | 4 | 32138.87 | 1995-11-18 | ... |
| 13728 | 4 | 123722.52 | 1995-12-11 | ... |
| 6532 | 4 | 129232.21 | 1996-02-18 | ... |
| 15395 | 4 | 37002.64 | 1996-05-14 | ... |
| 26722 | 4 | 173562.06 | 1996-05-16 | ... |
| 29574 | 4 | 112785.17 | 1996-07-12 | ... |
| 24416 | 4 | 184512.04 | 1996-10-18 | ... |
+------------+-----------+--------------+-------------+-----+
Code
WITH CTE1 AS (
SELECT O_ORDERKEY, O_CUSTKEY, O_TOTALPRICE, O_ORDERDATE
, LEAD(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE) AS NXT
, CASE
WHEN TIMESTAMPDIFF(MONTH, LAG(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE), O_ORDERDATE) <= 2 THEN 0
ELSE 1
END AS ISSTART
, CASE
WHEN TIMESTAMPDIFF(MONTH, O_ORDERDATE, LEAD(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE)) <= 2 THEN 0
ELSE 1
END AS ISEND
FROM TPCH_001.ORDERS
WHERE O_CUSTKEY = 4
),
CTE2 AS (
SELECT *, SUM(ISEND) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ISEND AS ISGROUP
FROM CTE1
),
CTE3 AS (
SELECT O_CUSTKEY, SUM(O_TOTALPRICE) OVER (PARTITION BY O_CUSTKEY, ISGROUP ) AS SUMTOTALPRICE, FIRST_VALUE(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY, ISGROUP ) AS FIRSTORDERDATE
, LAST_VALUE(O_ORDERDATE) OVER (PARTITION BY O_CUSTKEY, ISGROUP ) AS LASTORDERDATE
FROM CTE2
ORDER BY O_ORDERDATE
)
SELECT DISTINCT *
FROM CTE3
Stage
# CTE1,使用 lead 和 lag 开窗函数计算两次间隔时间,判断数据岛起始点
+------------+-----------+--------------+-------------+------------+---------+-------+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | NXT | ISSTART | ISEND |
+------------+-----------+--------------+-------------+------------+---------+-------+
| 22466 | 4 | 130160.51 | 1992-03-29 | 1992-05-13 | 1 | 0 |
| 10688 | 4 | 43453.24 | 1992-05-13 | 1992-06-20 | 0 | 0 |
| 26407 | 4 | 275581.07 | 1992-06-20 | 1992-07-10 | 0 | 0 |
| 44486 | 4 | 39197.54 | 1992-07-10 | 1993-01-08 | 0 | 1 |
| 54722 | 4 | 208672.53 | 1993-01-08 | 1993-02-28 | 1 | 0 |
| 20386 | 4 | 78070.68 | 1993-02-28 | 1993-03-07 | 0 | 0 |
| 26852 | 4 | 77869.29 | 1993-03-07 | 1993-04-21 | 0 | 0 |
| 38117 | 4 | 148782.33 | 1993-04-21 | 1994-04-03 | 0 | 1 |
| 36386 | 4 | 6829.14 | 1994-04-03 | 1994-11-06 | 1 | 1 |
| 56196 | 4 | 199826.92 | 1994-11-06 | 1995-05-22 | 1 | 1 |
| 25280 | 4 | 115422.02 | 1995-05-22 | 1995-07-30 | 1 | 0 |
| 14947 | 4 | 165118.45 | 1995-07-30 | 1995-10-05 | 0 | 0 |
| 59079 | 4 | 29140.97 | 1995-10-05 | 1995-11-18 | 0 | 0 |
| 46951 | 4 | 32138.87 | 1995-11-18 | 1995-12-11 | 0 | 0 |
| 13728 | 4 | 123722.52 | 1995-12-11 | 1996-02-18 | 0 | 0 |
| 6532 | 4 | 129232.21 | 1996-02-18 | 1996-05-14 | 0 | 0 |
| 15395 | 4 | 37002.64 | 1996-05-14 | 1996-05-16 | 0 | 0 |
| 26722 | 4 | 173562.06 | 1996-05-16 | 1996-07-12 | 0 | 0 |
| 29574 | 4 | 112785.17 | 1996-07-12 | 1996-10-18 | 0 | 1 |
| 24416 | 4 | 184512.04 | 1996-10-18 | 1997-01-15 | 1 | 0 |
| 10788 | 4 | 147767.08 | 1997-01-15 | 1997-02-24 | 0 | 0 |
| 34755 | 4 | 156681.56 | 1997-02-24 | 1997-04-02 | 0 | 0 |
| 58146 | 4 | 241263.01 | 1997-04-02 | 1997-04-06 | 0 | 0 |
| 41600 | 4 | 192516.42 | 1997-04-06 | 1997-08-31 | 0 | 1 |
| 59140 | 4 | 215749.35 | 1997-08-31 | 1997-09-16 | 1 | 0 |
| 25893 | 4 | 150695.21 | 1997-09-16 | 1997-11-21 | 0 | 0 |
| 320 | 4 | 50202.60 | 1997-11-21 | 1998-01-25 | 0 | 0 |
| 28417 | 4 | 209874.16 | 1998-01-25 | 1998-02-13 | 0 | 0 |
| 54404 | 4 | 182306.44 | 1998-02-13 | 1998-05-31 | 0 | 1 |
| 739 | 4 | 226008.80 | 1998-05-31 | 1998-06-24 | 1 | 0 |
| 54018 | 4 | 50422.56 | 1998-06-24 | NULL | 0 | 1 |
+------------+-----------+--------------+-------------+------------+---------+-------+
# CTE2,SUM(ISEND) OVER (PARTITION BY O_CUSTKEY ORDER BY O_ORDERDATE ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ISEND,于当前行累加 ISEND 对数据岛进行分组
+------------+-----------+--------------+-------------+------------+---------+-------+---------+
| O_ORDERKEY | O_CUSTKEY | O_TOTALPRICE | O_ORDERDATE | NXT | ISSTART | ISEND | ISGROUP |
+------------+-----------+--------------+-------------+------------+---------+-------+---------+
| 22466 | 4 | 130160.51 | 1992-03-29 | 1992-05-13 | 1 | 0 | 0 |
| 10688 | 4 | 43453.24 | 1992-05-13 | 1992-06-20 | 0 | 0 | 0 |
| 26407 | 4 | 275581.07 | 1992-06-20 | 1992-07-10 | 0 | 0 | 0 |
| 44486 | 4 | 39197.54 | 1992-07-10 | 1993-01-08 | 0 | 1 | 0 |
| 54722 | 4 | 208672.53 | 1993-01-08 | 1993-02-28 | 1 | 0 | 1 |
| 20386 | 4 | 78070.68 | 1993-02-28 | 1993-03-07 | 0 | 0 | 1 |
| 26852 | 4 | 77869.29 | 1993-03-07 | 1993-04-21 | 0 | 0 | 1 |
| 38117 | 4 | 148782.33 | 1993-04-21 | 1994-04-03 | 0 | 1 | 1 |
| 36386 | 4 | 6829.14 | 1994-04-03 | 1994-11-06 | 1 | 1 | 2 |
| 56196 | 4 | 199826.92 | 1994-11-06 | 1995-05-22 | 1 | 1 | 3 |
| 25280 | 4 | 115422.02 | 1995-05-22 | 1995-07-30 | 1 | 0 | 4 |
| 14947 | 4 | 165118.45 | 1995-07-30 | 1995-10-05 | 0 | 0 | 4 |
| 59079 | 4 | 29140.97 | 1995-10-05 | 1995-11-18 | 0 | 0 | 4 |
| 46951 | 4 | 32138.87 | 1995-11-18 | 1995-12-11 | 0 | 0 | 4 |
| 13728 | 4 | 123722.52 | 1995-12-11 | 1996-02-18 | 0 | 0 | 4 |
| 6532 | 4 | 129232.21 | 1996-02-18 | 1996-05-14 | 0 | 0 | 4 |
| 15395 | 4 | 37002.64 | 1996-05-14 | 1996-05-16 | 0 | 0 | 4 |
| 26722 | 4 | 173562.06 | 1996-05-16 | 1996-07-12 | 0 | 0 | 4 |
| 29574 | 4 | 112785.17 | 1996-07-12 | 1996-10-18 | 0 | 1 | 4 |
| 24416 | 4 | 184512.04 | 1996-10-18 | 1997-01-15 | 1 | 0 | 5 |
| 10788 | 4 | 147767.08 | 1997-01-15 | 1997-02-24 | 0 | 0 | 5 |
| 34755 | 4 | 156681.56 | 1997-02-24 | 1997-04-02 | 0 | 0 | 5 |
| 58146 | 4 | 241263.01 | 1997-04-02 | 1997-04-06 | 0 | 0 | 5 |
| 41600 | 4 | 192516.42 | 1997-04-06 | 1997-08-31 | 0 | 1 | 5 |
| 59140 | 4 | 215749.35 | 1997-08-31 | 1997-09-16 | 1 | 0 | 6 |
| 25893 | 4 | 150695.21 | 1997-09-16 | 1997-11-21 | 0 | 0 | 6 |
| 320 | 4 | 50202.60 | 1997-11-21 | 1998-01-25 | 0 | 0 | 6 |
| 28417 | 4 | 209874.16 | 1998-01-25 | 1998-02-13 | 0 | 0 | 6 |
| 54404 | 4 | 182306.44 | 1998-02-13 | 1998-05-31 | 0 | 1 | 6 |
| 739 | 4 | 226008.80 | 1998-05-31 | 1998-06-24 | 1 | 0 | 7 |
| 54018 | 4 | 50422.56 | 1998-06-24 | NULL | 0 | 1 | 7 |
+------------+-----------+--------------+-------------+------------+---------+-------+---------+
# CTE3,按照 ISGROUP 使用 FIRST_VALUE 和 LAST_VALUE 获取最小最大值
+-----------+---------------+----------------+---------------+---------+
| O_CUSTKEY | SUMTOTALPRICE | FIRSTORDERDATE | LASTORDERDATE | ISGROUP |
+-----------+---------------+----------------+---------------+---------+
| 4 | 488392.36 | 1992-03-29 | 1992-07-10 | 0 |
| 4 | 488392.36 | 1992-03-29 | 1992-07-10 | 0 |
| 4 | 488392.36 | 1992-03-29 | 1992-07-10 | 0 |
| 4 | 488392.36 | 1992-03-29 | 1992-07-10 | 0 |
| 4 | 513394.83 | 1993-01-08 | 1993-04-21 | 1 |
| 4 | 513394.83 | 1993-01-08 | 1993-04-21 | 1 |
| 4 | 513394.83 | 1993-01-08 | 1993-04-21 | 1 |
| 4 | 513394.83 | 1993-01-08 | 1993-04-21 | 1 |
| 4 | 6829.14 | 1994-04-03 | 1994-04-03 | 2 |
| 4 | 199826.92 | 1994-11-06 | 1994-11-06 | 3 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 | 4 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 | 5 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 | 5 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 | 5 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 | 5 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 | 5 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 | 6 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 | 6 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 | 6 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 | 6 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 | 6 |
| 4 | 276431.36 | 1998-05-31 | 1998-06-24 | 7 |
| 4 | 276431.36 | 1998-05-31 | 1998-06-24 | 7 |
+-----------+---------------+----------------+---------------+---------+
Result
+-----------+---------------+----------------+---------------+
| O_CUSTKEY | SUMTOTALPRICE | FIRSTORDERDATE | LASTORDERDATE |
+-----------+---------------+----------------+---------------+
| 4 | 488392.36 | 1992-03-29 | 1992-07-10 |
| 4 | 513394.83 | 1993-01-08 | 1993-04-21 |
| 4 | 6829.14 | 1994-04-03 | 1994-04-03 |
| 4 | 199826.92 | 1994-11-06 | 1994-11-06 |
| 4 | 918124.91 | 1995-05-22 | 1996-07-12 |
| 4 | 922740.11 | 1996-10-18 | 1997-04-06 |
| 4 | 808827.76 | 1997-08-31 | 1998-02-13 |
| 4 | 276431.36 | 1998-05-31 | 1998-06-24 |
+-----------+---------------+----------------+---------------+