1.创建示例数据:
drop table sales_fact;
CREATE table sales_fact AS
SELECT country_name country,country_subRegion region, prod_name product, calendar_year year, calendar_week_number week,
SUM(amount_sold) sale,
sum(amount_sold*
( case
when mod(rownum, 10)=0 then 1.4
when mod(rownum, 5)=0 then 0.6
when mod(rownum, 2)=0 then 0.9
when mod(rownum,2)=1 then 1.2
else 1
end )) receipts
FROM sales, times, customers, countries, products
WHERE sales.time_id = times.time_id AND
sales.prod_id = products.prod_id AND
sales.cust_id = customers.cust_id AND
customers.country_id = countries.country_id
GROUP BY
country_name,country_subRegion, prod_name, calendar_year, calendar_week_number;
分析函数具有3个基本组成部分:分区子句,排序子句和开窗子句。
rows BETWEEN unbounded preceding AND CURRENT ROW开窗子句:
SELECT YEAR,
week,
sale,
SUM(sale) over(PARTITION BY product, country, region, YEAR ORDER BY week rows BETWEEN unbounded preceding AND CURRENT ROW) running_sum_ytd
FROM sales_fact
WHERE country IN ('Australia')
7 AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE RUNNING_SUM_YTD
---------- ---------- ---------- ---------------
1998 1 58.15 58.15
1998 2 29.39 87.54
1998 3 29.49 117.03
1998 4 29.49 146.52
1998 5 29.8 176.32
1998 6 58.78 235.1
1998 9 58.78 293.88
1998 10 117.76 411.64
1998 12 59.6 471.24
1998 14 58.78 530.02
1998 15 58.78 588.8
求出sale列的最大值:
SELECT YEAR,
week,
sale,
MAX(sale) over(PARTITION BY product, country, region, YEAR ORDER BY week rows BETWEEN unbounded preceding AND unbounded following) max_sale
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE MAX_SALE
---------- ---------- ---------- ----------
1998 1 58.15 172.56
1998 2 29.39 172.56
1998 3 29.49 172.56
1998 4 29.49 172.56
1998 5 29.8 172.56
1998 6 58.78 172.56
1998 9 58.78 172.56
1998 10 117.76 172.56
1998 12 59.6 172.56
1998 14 58.78 172.56
1998 15 58.78 172.56
细粒度窗口声明
计算2周前和本周及后2周的情况:
SELECT YEAR,
week,
sale,
MAX(sale) over(PARTITION BY product, country, region, YEAR ORDER BY week rows BETWEEN unbounded preceding AND unbounded following) max_sale
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE MAX_SALE
---------- ---------- ---------- ----------
1998 1 58.15 172.56
1998 2 29.39 172.56
1998 3 29.49 172.56
1998 4 29.49 172.56
1998 5 29.8 172.56
1998 6 58.78 172.56
1998 9 58.78 172.56
1998 10 117.76 172.56
1998 12 59.6 172.56
1998 14 58.78 172.56
1998 15 58.78 172.56
默认窗口的声明子句是rows between unbounded preceding and current now.
LEAD和LAG
LAG和LEAD 函数提供了跨行引用的能力。LAG提供了访问结果集中前面的的行的能力,Lead函数允许访问结果集后面的行。
SELECT YEAR,
week,
sale,
MAX(sale) over(PARTITION BY product, country, region, YEAR ORDER BY week rows BETWEEN unbounded preceding AND unbounded following) max_sale
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE MAX_SALE
---------- ---------- ---------- ----------
1998 1 58.15 172.56
1998 2 29.39 172.56
1998 3 29.49 172.56
1998 4 29.49 172.56
1998 5 29.8 172.56
1998 6 58.78 172.56
1998 9 58.78 172.56
1998 10 117.76 172.56
1998 12 59.6 172.56
1998 14 58.78 172.56
1998 15 58.78 172.56
LEAD
SELECT YEAR,
week,
sale,
lead(sale, 1, sale) over(PARTITION BY product, country, region ORDER BY YEAR, week) prior_wk_sales
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
FIRST_VALUE 和LAST_VALUE:
SELECT YEAR,
week,
sale,
first_value(sale) over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC rows BETWEEN unbounded preceding AND unbounded following) top_sale_value,
first_value(week) over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC rows BETWEEN unbounded preceding AND unbounded following) top_week_sale
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
9 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE TOP_SALE_VALUE TOP_WEEK_SALE
---------- ---------- ---------- -------------- -------------
1998 1 58.15 172.56 48
1998 2 29.39 172.56 48
1998 3 29.49 172.56 48
1998 4 29.49 172.56 48
1998 5 29.8 172.56 48
1998 6 58.78 172.56 48
1998 9 58.78 172.56 48
1998 10 117.76 172.56 48
1998 12 59.6 172.56 48
1998 14 58.78 172.56 48
1998 15 58.78 172.56 48
SELECT YEAR,
week,
sale,
last_value(sale) over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC rows BETWEEN unbounded preceding AND unbounded following) low_value
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE LOW_VALUE
---------- ---------- ---------- ----------
1998 1 58.15 28.76
1998 2 29.39 28.76
1998 3 29.49 28.76
1998 4 29.49 28.76
1998 5 29.8 28.76
1998 6 58.78 28.76
1998 9 58.78 28.76
1998 10 117.76 28.76
1998 12 59.6 28.76
1998 14 58.78 28.76
1998 15 58.78 28.76
SELECT YEAR,
week,
sale,
nth_value(sale, 2) over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC rows BETWEEN unbounded preceding AND unbounded following) top_2nd_value
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
8 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE TOP_2ND_VALUE
---------- ---------- ---------- -------------
1998 1 58.15 117.76
1998 2 29.39 117.76
1998 3 29.49 117.76
1998 4 29.49 117.76
1998 5 29.8 117.76
1998 6 58.78 117.76
1998 9 58.78 117.76
1998 10 117.76 117.76
1998 12 59.6 117.76
1998 14 58.78 117.76
1998 15 58.78 117.76
RANK函数:
求出销售前10名的周:
rank跳过并列排名的值:
SELECT *
FROM (SELECT YEAR,
week,
sale,
rank() over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC) sales_rank
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
ORDER BY product, country, YEAR, week)
WHERE sales_rank <= 10
11 ORDER BY 1, 4;
YEAR WEEK SALE SALES_RANK
---------- ---------- ---------- ----------
1998 48 172.56 1
1998 10 117.76 2
1998 18 117.56 3
1998 23 117.56 3
1998 26 117.56 3
1998 38 115.84 6
1998 42 115.84 6
1998 39 115.84 6
1998 34 115.44 9
1998 52 86.38 10
1999 17 148.12 1
DENSE_RANK:不会跳过并列排名的值:
SELECT *
FROM (SELECT YEAR,
week,
sale,
dense_rank() over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC) sales_rank
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
ORDER BY product, country, YEAR, week)
WHERE sales_rank <= 10
11 ORDER BY 1, 4;
YEAR WEEK SALE SALES_RANK
---------- ---------- ---------- ----------
1998 48 172.56 1
1998 10 117.76 2
1998 18 117.56 3
1998 23 117.56 3
1998 26 117.56 3
1998 38 115.84 4
1998 39 115.84 4
1998 42 115.84 4
1998 34 115.44 5
1998 52 86.38 6
1998 21 59.6 7
ROW_NUMBER:
SELECT YEAR,
week,
sale,
row_number() over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC) sales_rn,
rank() over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC) sales_rank
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
ORDER BY product, country, YEAR, sales_rank;
RATIO_TO_REPORT:
SQL> set linesize 100
SELECT YEAR,
week,
sale,
trunc(100 * ratio_to_report(sale)
over(PARTITION BY product, country, region, YEAR),
2) sales_yr,
trunc(100 * ratio_to_report(sale)
over(PARTITION BY product, country, region),
2) sales_grod
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
13 ORDER BY product, country, YEAR, week;
YEAR WEEK SALE SALES_YR SALES_GROD
---------- ---------- ---------- ---------- ----------
1998 1 58.15 2.26 .43
1998 2 29.39 1.14 .21
1998 3 29.49 1.15 .22
1998 4 29.49 1.15 .22
1998 5 29.8 1.16 .22
1998 6 58.78 2.29 .43
1998 9 58.78 2.29 .43
1998 10 117.76 4.59 .88
1998 12 59.6 2.32 .44
1998 14 58.78 2.29 .43
1998 15 58.78 2.29 .43
PERCENT_RANK:
SELECT *
FROM (SELECT YEAR,
week,
sale,
100 * percent_rank() over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC) pr
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory')
WHERE pr < 50
ORDER BY YEAR, sale DESC;
STDDV,计算标准偏差:
SELECT YEAR,
week,
sale,
STDDEV(sale) over(PARTITION BY product, country, region, YEAR ORDER BY sale DESC rows BETWEEN unbounded preceding AND unbounded following) stddv
FROM sales_fact
WHERE country IN ('Australia')
AND product = 'Xtend Memory'
ORDER BY YEAR, week;
LISTAGG函数:(把国家转换成由逗号分隔的列表)。
SELECT listagg(country, ',') within GROUP(ORDER BY country DESC)
FROM (SELECT DISTINCT country FROM sales_fact ORDER BY country);