-- 窗口函数sum,max,min,avg
SELECT
cookieid,
createtime,
pv,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING) AS pv3,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS pv4
FROM lxw1234;
-- 窗口函数row_number,ntile,rank,dense_rank(不支持rows between)
-- row_number分组排名,ntile切片,rank分组排名留空,dense_rank分组排名不留空
SELECT
cookieid,
createtime,
pv,
ROW_NUMBER() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn,--排名,分组内每条记录一个行号,无关心createtime是否相同
NTILE(3) OVER(PARTITION BY cookieid ORDER BY createtime) AS rn1,--表示对分组内的数据排序后切3片,多余的会加到第一片中
RANK() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn2,--排名,生成数据项在分组中的排名,排名相等会在名次中留下空位
DENSE_RANK() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn3 --与rank的区别是不留空位
FROM lxw1234;
-- 窗口函数LAG,LEAD,FIRST_VALUE,LAST_VALUE
-- LAD取统计窗口内往上第3行的数如果为null,用默认值填充,LEAD相反,FIRST_VALUE取截止当前行的第一个值,LAST_VALUE取截止当前行的最后一个值
SELECT
cookieid,
createtime,
pv,
LAG(createtime,1,'1970-01-01 00:00:00') OVER(PARTITION BY cookieid ORDER BY createtime) AS rn,
LEAD(createtime,3,'1970-01-01 00:00:00') OVER(PARTITION BY cookieid ORDER BY createtime) AS rn1,
FIRST_VALUE() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn3,
LAST_VALUE() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn4
FROM lxw1234;
-- 窗口函数 CUME_DIST,PERCENT_RANK
--CUME_DIST 小于等于当前值的行数/分组内总行数,PERCENT_RANK 分组内当前行的RANK值-1/分组内总行数-1
SELECT
cookieid,
createtime,
pv,
CUME_DIST() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn,
PERCENT_RANK() OVER(PARTITION BY cookieid ORDER BY createtime) AS rn1
FROM lxw1234;
-- 窗口函数GROUPING SETS,GROUPING__ID,CUBE,ROLLUP
-- CUBE根据GROUP BY的维度的所有组合进行聚合,ROLLUP是CUBE的子集,以最左侧的维度为主,从该维度进行层级聚合。
-- GROUPING__ID表示分组号,这里要注意CUBE和ROLLUP,ROLLUP其实只是在CUBE的组号完成之后把不属于层级聚合的去掉而已
SELECT
month,
day,
COUNT(DISTINCT cookieid) AS uv,
GROUPING__ID
FROM lxw1234
GROUP BY month,day
GROUPING SETS (month,day)
ORDER BY GROUPING__ID;
SELECT
month,
day,
COUNT(DISTINCT cookieid) AS uv,
GROUPING__ID
FROM lxw1234
GROUP BY month,day
WITH CUBE
ORDER BY GROUPING__ID;
SELECT
month,
day,
COUNT(DISTINCT cookieid) AS uv,
GROUPING__ID
FROM lxw1234
GROUP BY month,day
WITH ROLLUP
ORDER BY GROUPING__ID;