文章目录
- SUM AVG MIN MAX
- rank row_number dense_rank
- ntile - 将数据按照指定的顺序分成几部分
- PERCENT_RANK 百分比rank
- CUME_DIST 小于等于自己的比例
- LAST_VALUE & FIRST_VALUE 截止到当前最后一个值
- LAG & LEAD取前几行的值
- Mysql实现开窗 [不好用建议迁移oracle或者impala]
SUM AVG MIN MAX
SELECT id,
date_time,
pv,
SUM(pv) OVER(PARTITION BY id ORDER BY date_time) AS pv1,
SUM(pv) OVER(PARTITION BY id ORDER BY date_time ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2,
SUM(pv) OVER(PARTITION BY id) AS pv3,
SUM(pv) OVER(PARTITION BY id ORDER BY date_time ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4,
SUM(pv) OVER(PARTITION BY id ORDER BY date_time ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5,
SUM(pv) OVER(PARTITION BY id ORDER BY date_time ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6
FROM 表;
id date_time pv pv1 pv2 pv3 pv4 pv5 pv6
1 20191101 1 1 1 26 1 6 26
1 20191102 5 6 6 26 6 13 25
1 20191103 7 13 13 26 13 16 20
1 20191104 3 16 16 26 16 18 13
1 20191105 2 18 18 26 17 21 10
1 20191106 4 22 22 26 16 20 8
1 20191107 4 26 26 26 13 13 4
ROWS BETWEEN ... AND
PRECEDING 往前
FOLLOWING 往后
CURRENT ROW 当前行
UNBOUNDED PRECEDING 表示第一行
UNBOUNDED FOLLOWING 表示最后一行
ORDER BY
rank row_number dense_rank
SELECT
id,
date_time,
pv,
RANK() OVER(PARTITION BY id ORDER BY pv desc) AS rk1,
DENSE_RANK() OVER(PARTITION BY id ORDER BY pv desc) AS rk2,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY pv DESC) AS rk3
FROM 表
id date_time pv rk1 rk2 rk3
1 20191101 7 1 1 1
1 20191102 5 2 2 2
1 20191103 4 3 3 3
1 20191104 4 3 3 4
1 20191105 3 5 4 5
1 20191106 2 6 5 6
1 20191107 1 7 6 7
ntile - 将数据按照指定的顺序分成几部分
SELECT
id,
date_time,
pv,
NTILE(2) OVER(PARTITION BY id ORDER BY date_time) AS n1,
NTILE(3) OVER(PARTITION BY id ORDER BY date_time) AS n2,
NTILE(4) OVER(ORDER BY date_time) AS n3
FROM 表
ORDER BY id,date_time;
id date_time pv n1 n2 n3
1 20191101 1 1 1 1
1 20191102 5 1 1 1
1 20191103 7 1 1 2
1 20191104 3 1 2 2
1 20191105 2 2 2 3
1 20191106 4 2 3 3
1 20191107 4 2 3 4
2 20191101 2 1 1 1
2 20191102 3 1 1 1
2 20191103 5 1 1 2
2 20191104 6 1 2 2
2 20191105 3 2 2 3
2 20191106 9 2 3 4
2 20191107 7 2 3 4
PERCENT_RANK 百分比rank
SELECT
dept,
id,
sal,
PERCENT_RANK() OVER(ORDER BY sal) AS n1,
RANK() OVER(ORDER BY sal) AS n11,
SUM(1) OVER(PARTITION BY NULL) AS n12,
PERCENT_RANK() OVER(PARTITION BY dept ORDER BY sal) AS n2
FROM 表;
dept id sal n1 n11 n12 n2
1 1 1000 0.0 1 5 0.0
1 2 2000 0.25 2 5 0.5
1 3 3000 0.5 3 5 1.0
2 4 4000 0.75 4 5 0.0
2 5 5000 1.0 5 5 1.0
CUME_DIST 小于等于自己的比例
SELECT
dept,
id,
sal,
CUME_DIST() OVER(ORDER BY sal) AS n1,
CUME_DIST() OVER(PARTITION BY dept ORDER BY sal) AS n2
FROM 表;
dept id sal rn1 rn2
1 1 1000 0.2 0.3333333333333333
1 2 2000 0.4 0.6666666666666666
1 3 3000 0.6 1.0
2 4 4000 0.8 0.5
2 5 5000 1.0 1.0
n1: 没有partition,所有数据均为1组,总行数为5,
第一行:小于等于1000的行数为1,因此,1/5=0.2
第三行:小于等于3000的行数为3,因此,3/5=0.6
n2: 按照部门分组,dept=1的行数为3,
第二行:小于等于2000的行数为2,因此,2/3=0.6666666666666666
LAST_VALUE & FIRST_VALUE 截止到当前最后一个值
SELECT
id,
date_time,
url,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY date_time) AS rn,
LAST_VALUE(url) OVER(PARTITION BY id ORDER BY date_time) AS last1
FROM 表;
id date_time url rn last1
1 20191001 url1 1 url1
1 20191001 url2 2 url2
1 20191001 url3 3 url3
1 20191001 url4 4 url4
1 20191001 url5 5 url5
1 20191001 url6 6 url6
1 20191001 url7 7 url7
2 20191001 url11 1 url11
2 20191001 url22 2 url22
2 20191001 url33 3 url33
2 20191001 url44 4 url44
2 20191001 url55 5 url55
2 20191001 url66 6 url66
2 20191001 url77 7 url77
LAG & LEAD取前几行的值
SELECT id,
date_time,
url,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY date_time) AS rn,
LAG(date_time,1,'1970-01-01 00:00:00') OVER(PARTITION BY id ORDER BY date_time) AS t1,
LAG(date_time,2) OVER(PARTITION BY id ORDER BY date_time) AS t2
FROM 表;
id date_time url rn t1 t2
1 2019-11-10 10:00:00 url1 1 1970-01-01 00:00:00 NULL
1 2019-11-10 10:00:02 url2 2 2019-11-10 10:00:00 NULL
1 2019-11-10 10:03:04 url3 3 2019-11-10 10:00:02 2019-11-10 10:00:00
1 2019-11-10 10:10:00 url4 4 2019-11-10 10:03:04 2019-11-10 10:00:02
1 2019-11-10 10:50:01 url5 5 2019-11-10 10:10:00 2019-11-10 10:03:04
1 2019-11-10 10:50:05 url6 6 2019-11-10 10:50:01 2019-11-10 10:10:00
1 2019-11-10 11:00:00 url7 7 2019-11-10 10:50:05 2019-11-10 10:50:01
2 2019-11-10 10:00:00 url11 1 1970-01-01 00:00:00 NULL
2 2019-11-10 10:00:02 url22 2 2019-11-10 10:00:00 NULL
2 2019-11-10 10:03:04 url33 3 2019-11-10 10:00:02 2019-11-10 10:00:00
2 2019-11-10 10:10:00 url44 4 2019-11-10 10:03:04 2019-11-10 10:00:02
2 2019-11-10 10:50:01 url55 5 2019-11-10 10:10:00 2019-11-10 10:03:04
2 2019-11-10 10:50:05 url66 6 2019-11-10 10:50:01 2019-11-10 10:10:00
2 2019-11-10 11:00:00 url77 7 2019-11-10 10:50:05 2019-11-10 10:50:01
t1: 指定了往上第1行的值,default为'1970-01-01 00:00:00'
1第一行,往上1行为NULL,因此取默认值 1970-01-01 00:00:00
1第三行,往上1行值为第二行值,2019-11-10 10:00:02
1第六行,往上1行值为第五行值,2019-11-10 10:50:01
t2: 指定了往上第2行的值,为指定默认值
1第一行,往上2行为NULL
1第二行,往上2行为NULL
1第四行,往上2行为第二行值,2019-11-10 10:00:02
1第七行,往上2行为第五行值,2019-11-10 10:50:01
Mysql实现开窗 [不好用建议迁移oracle或者impala]
select
pagemountedtime
,@rowNum:=@rowNum+1
from
(
select
pagemountedtime
,@rowNum :=0
from 表
order by
pagemountedtime
) t