/*
DROP TABLE dimension_tab; CREATE TABLE dimension_tab ( fact_1_id NUMBER NOT NULL, fact_2_id NUMBER NOT NULL, fact_3_id NUMBER NOT NULL, fact_4_id NUMBER NOT NULL, sales_value NUMBER(10,2) NOT NULL ); INSERT INTO dimension_tab SELECT TRUNC(DBMS_RANDOM.value(low => 1, high => 3)) AS fact_1_id, TRUNC(DBMS_RANDOM.value(low => 1, high => 6)) AS fact_2_id, TRUNC(DBMS_RANDOM.value(low => 1, high => 11)) AS fact_3_id, TRUNC(DBMS_RANDOM.value(low => 1, high => 11)) AS fact_4_id, ROUND(DBMS_RANDOM.value(low => 1, high => 100), 2) AS sales_value FROM dual CONNECT BY level <= 1000; COMMIT;
SELECT SUM(sales_value) AS sales_value
FROM dimension_tab;
SALES_VALUE
-----------
50528.39
1 row selected.
SQL>
SELECT SUM(sales_value) AS sales_value FROM dimension_tab; SALES_VALUE ----------- 50528.39 1 row selected. SQL>
SELECT fact_1_id, COUNT(*) AS num_rows, SUM(sales_value) AS sales_value FROM dimension_tab GROUP BY fact_1_id ORDER BY fact_1_id; FACT_1_ID NUM_ROWS SALES_VALUE ---------- ---------- ----------- 1 478 24291.35 2 522 26237.04 2 rows selected. SQL>
SELECT fact_1_id,
fact_2_id,
COUNT(*) AS num_rows,
SUM(sales_value) AS sales_value
FROM dimension_tab
GROUP BY fact_1_id, fact_2_id
ORDER BY fact_1_id, fact_2_id;
FACT_1_ID FACT_2_ID NUM_ROWS SALES_VALUE
---------- ---------- ---------- -----------
1 1 83 4363.55
1 2 96 4794.76
1 3 93 4718.25
1 4 105 5387.45
1 5 101 5027.34
2 1 109 5652.84
2 2 96 4583.02
2 3 110 5555.77
2 4 113 5936.67
2 5 94 4508.74
10 rows selected.
SQL>
SELECT fact_1_id, fact_2_id, SUM(sales_value) AS sales_value FROM dimension_tab GROUP BY ROLLUP (fact_1_id, fact_2_id) ORDER BY fact_1_id, fact_2_id; FACT_1_ID FACT_2_ID SALES_VALUE ---------- ---------- ----------- 1 1 4363.55 1 2 4794.76 1 3 4718.25 1 4 5387.45 1 5 5027.34 1 24291.35 2 1 5652.84 2 2 4583.02 2 3 5555.77 2 4 5936.67 2 5 4508.74 2 26237.04 50528.39 13 rows selected. SQL>以下语句结果:Click Here.当行中包含null的时候,这种并不是一种很好的方法,稍后会讨论.
SELECT fact_1_id,
fact_2_id,
fact_3_id,
SUM(sales_value) AS sales_value
FROM dimension_tab
GROUP BY ROLLUP (fact_1_id, fact_2_id, fact_3_id)
ORDER BY fact_1_id, fact_2_id, fact_3_id;
SELECT fact_1_id,
fact_2_id,
fact_3_id,
SUM(sales_value) AS sales_value
FROM dimension_tab
GROUP BY fact_1_id, ROLLUP (fact_2_id, fact_3_id)
ORDER BY fact_1_id, fact_2_id, fact_3_id;
SELECT fact_1_id, fact_2_id, SUM(sales_value) AS sales_value FROM dimension_tab GROUP BY CUBE (fact_1_id, fact_2_id) ORDER BY fact_1_id, fact_2_id; FACT_1_ID FACT_2_ID SALES_VALUE ---------- ---------- ----------- 1 1 4363.55 1 2 4794.76 1 3 4718.25 1 4 5387.45 1 5 5027.34 1 24291.35 2 1 5652.84 2 2 4583.02 2 3 5555.77 2 4 5936.67 2 5 4508.74 2 26237.04 1 10016.39 2 9377.78 3 10274.02 4 11324.12 5 9536.08 50528.39 18 rows selected. SQL>如果cube中有三组(或n组)时,结果(或需计算):Click Here
SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value FROM dimension_tab GROUP BY CUBE (fact_1_id, fact_2_id, fact_3_id) ORDER BY fact_1_id, fact_2_id, fact_3_id;
SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value FROM dimension_tab GROUP BY fact_1_id, CUBE (fact_2_id, fact_3_id) ORDER BY fact_1_id, fact_2_id, fact_3_id;
SELECT fact_1_id, fact_2_id, SUM(sales_value) AS sales_value, GROUPING(fact_1_id) AS f1g, GROUPING(fact_2_id) AS f2g FROM dimension_tab GROUP BY CUBE (fact_1_id, fact_2_id) ORDER BY fact_1_id, fact_2_id; FACT_1_ID FACT_2_ID SALES_VALUE F1G F2G ---------- ---------- ----------- ---------- ---------- 1 1 4363.55 0 0 1 2 4794.76 0 0 1 3 4718.25 0 0 1 4 5387.45 0 0 1 5 5027.34 0 0 1 24291.35 0 1 2 1 5652.84 0 0 2 2 4583.02 0 0 2 3 5555.77 0 0 2 4 5936.67 0 0 2 5 4508.74 0 0 2 26237.04 0 1 1 10016.39 1 0 2 9377.78 1 0 3 10274.02 1 0 4 11324.12 1 0 5 9536.08 1 0 50528.39 1 1 18 rows selected. SQL>
可以看出:(都是由rollup或cube所产生的)
FACT_1_ID
列的统计FACT_2_ID
列的统计FACT_1_ID
和FACT_2_ID
所产生的一个统计grouping函数可以用来筛选排序结果.
SELECT fact_1_id,
fact_2_id,
SUM(sales_value) AS sales_value,
GROUPING(fact_1_id) AS f1g,
GROUPING(fact_2_id) AS f2g
FROM dimension_tab
GROUP BY CUBE (fact_1_id, fact_2_id)
HAVING GROUPING(fact_1_id) = 1 OR GROUPING(fact_2_id) = 1 ORDER BY GROUPING(fact_1_id), GROUPING(fact_2_id);
FACT_1_ID FACT_2_ID SALES_VALUE F1G F2G
---------- ---------- ----------- ---------- ----------
1 24291.35 0 1
2 26237.04 0 1
4 11324.12 1 0
3 10274.02 1 0
2 9377.78 1 0
1 10016.39 1 0
5 9536.08 1 0
50528.39 1 1
8 rows selected.
SQL>
grouping_id函数也是提供了一个可以确定是否为统计的行的辨别方式.grouping_id返回的是group by分组的级别(或者说层次)
SELECT fact_1_id,
fact_2_id,
SUM(sales_value) AS sales_value,
GROUPING_ID(fact_1_id, fact_2_id) AS grouping_id
FROM dimension_tab
GROUP BY CUBE (fact_1_id, fact_2_id)
ORDER BY fact_1_id, fact_2_id;
FACT_1_ID FACT_2_ID SALES_VALUE GROUPING_ID
---------- ---------- ----------- -----------
1 1 4363.55 0
1 2 4794.76 0
1 3 4718.25 0
1 4 5387.45 0
1 5 5027.34 0
1 24291.35 1
2 1 5652.84 0
2 2 4583.02 0
2 3 5555.77 0
2 4 5936.67 0
2 5 4508.74 0
2 26237.04 1
1 10016.39 2
2 9377.78 2
3 10274.02 2
4 11324.12 2
5 9536.08 2
50528.39 3
18 rows selected.
SQL>
重复的统计的结果进行划分.第一次为0,依次出现相同结果,id 1开始递增.
SELECT fact_1_id, fact_2_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_1_id, fact_2_id) AS grouping_id, GROUP_ID() AS group_id FROM dimension_tab GROUP BY GROUPING SETS(fact_1_id, CUBE (fact_1_id, fact_2_id)) ORDER BY fact_1_id, fact_2_id; FACT_1_ID FACT_2_ID SALES_VALUE GROUPING_ID GROUP_ID ---------- ---------- ----------- ----------- ---------- 1 1 4363.55 0 0 1 2 4794.76 0 0 1 3 4718.25 0 0 1 4 5387.45 0 0 1 5 5027.34 0 0 1 24291.35 1 1 1 24291.35 1 0 2 1 5652.84 0 0 2 2 4583.02 0 0 2 3 5555.77 0 0 2 4 5936.67 0 0 2 5 4508.74 0 0 2 26237.04 1 1 2 26237.04 1 0 1 10016.39 2 0 2 9377.78 2 0 3 10274.02 2 0 4 11324.12 2 0 5 9536.08 2 0 50528.39 3 0 20 rows selected. SQL>
也可对结果进行筛选.
SELECT fact_1_id,
fact_2_id,
SUM(sales_value) AS sales_value,
GROUPING_ID(fact_1_id, fact_2_id) AS grouping_id,
GROUP_ID() AS group_id
FROM dimension_tab
GROUP BY GROUPING SETS(fact_1_id, CUBE (fact_1_id, fact_2_id))
HAVING GROUP_ID() = 0
ORDER BY fact_1_id, fact_2_id;
FACT_1_ID FACT_2_ID SALES_VALUE GROUPING_ID GROUP_ID
---------- ---------- ----------- ----------- ----------
1 1 4363.55 0 0
1 2 4794.76 0 0
1 3 4718.25 0 0
1 4 5387.45 0 0
1 5 5027.34 0 0
1 24291.35 1 0
2 1 5652.84 0 0
2 2 4583.02 0 0
2 3 5555.77 0 0
2 4 5936.67 0 0
2 5 4508.74 0 0
2 26237.04 1 0
1 10016.39 2 0
2 9377.78 2 0
3 10274.02 2 0
4 11324.12 2 0
5 9536.08 2 0
50528.39 3 0
18 rows selected.
SQL>
使用cube,特别是在多列时,分组会很多.以下为例,将会有8个组层次.结果:Click Here
SELECT fact_1_id,
fact_2_id,
fact_3_id,
SUM(sales_value) AS sales_value,
GROUPING_ID(fact_1_id, fact_2_id, fact_3_id) AS grouping_id
FROM dimension_tab
GROUP BY CUBE(fact_1_id, fact_2_id, fact_3_id)
ORDER BY fact_1_id, fact_2_id, fact_3_id;
如果我们只需要其中一些分组结果,用grouping sets来筛选,选出"FACT_1_ID, FACT_2_ID
" and "FACT_1_ID, FACT_3_ID分组的统计
SELECT fact_1_id,
fact_2_id,
fact_3_id,
SUM(sales_value) AS sales_value,
GROUPING_ID(fact_1_id, fact_2_id, fact_3_id) AS grouping_id
FROM dimension_tab
GROUP BY GROUPING SETS((fact_1_id, fact_2_id), (fact_1_id, fact_3_id))
ORDER BY fact_1_id, fact_2_id, fact_3_id;
FACT_1_ID FACT_2_ID FACT_3_ID SALES_VALUE GROUPING_ID
---------- ---------- ---------- ----------- -----------
1 1 4363.55 1
1 2 4794.76 1
1 3 4718.25 1
1 4 5387.45 1
1 5 5027.34 1
1 1 2737.4 2
1 2 1854.29 2
1 3 2090.96 2
1 4 2605.17 2
1 5 2590.93 2
1 6 2506.9 2
1 7 1839.85 2
1 8 2953.04 2
1 9 2778.75 2
1 10 2334.06 2
2 1 5652.84 1
2 2 4583.02 1
2 3 5555.77 1
2 4 5936.67 1
2 5 4508.74 1
2 1 3512.69 2
2 2 2847.94 2
2 3 2972.5 2
2 4 2534.06 2
2 5 3115.99 2
2 6 2775.85 2
2 7 2208.19 2
2 8 2358.55 2
2 9 1884.11 2
2 10 2027.16 2
30 rows selected.
SQL>
8.组合列
使用rollup的列的组合(统计),
ROLLUP (a, b, c) (a, b, c) (a, b) (a) ()使用cube的列的组合(统计),
CUBE (a, b, c) (a, b, c) (a, b) (a, c) (a) (b, c) (b) (c) ()允许使用括号将列括起来.在使用rollup或cube,grouping sets时,会将其作为一个单独的整体,不会拆分括号里面的列再组合(统计).
ROLLUP ((a, b), c) (a, b, c) (a, b) () Not considered: (a)
CUBE ((a, b), c) (a, b, c) (a, b) (c) () Not considered: (a, c) (a) (b, c) (b)
以下使用cube时,括号与不带括号.结果:Click Here For Regular 和Click Here For braces
-- Regular Cube. SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_1_id, fact_2_id, fact_3_id) AS grouping_id FROM dimension_tab GROUP BY CUBE(fact_1_id, fact_2_id, fact_3_id) ORDER BY fact_1_id, fact_2_id, fact_3_id; -- Cube with composite column. SELECT fact_1_id, fact_2_id, fact_3_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_1_id, fact_2_id, fact_3_id) AS grouping_id FROM dimension_tab GROUP BY CUBE((fact_1_id, fact_2_id), fact_3_id) ORDER BY fact_1_id, fact_2_id, fact_3_id;
9.ConcatenatedGroupings(个人理解:组之间的笛卡尔积)
在使用group by时,有GROUPING SETS
, CUBE
和ROLLUP组合时,不同组会产生笛卡尔积.
SELECT fact_1_id, fact_2_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_1_id, fact_2_id) AS grouping_id FROM dimension_tab GROUP BY GROUPING SETS(fact_1_id, fact_2_id) ORDER BY fact_1_id, fact_2_id; FACT_1_ID FACT_2_ID SALES_VALUE GROUPING_ID ---------- ---------- ----------- ----------- 1 24291.35 1 2 26237.04 1 1 10016.39 2 2 9377.78 2 3 10274.02 2 4 11324.12 2 5 9536.08 2 7 rows selected. SQL>
SELECT fact_3_id, fact_4_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_3_id, fact_4_id) AS grouping_id FROM dimension_tab GROUP BY GROUPING SETS(fact_3_id, fact_4_id) ORDER BY fact_3_id, fact_4_id; FACT_3_ID FACT_4_ID SALES_VALUE GROUPING_ID ---------- ---------- ----------- ----------- 1 6250.09 1 2 4702.23 1 3 5063.46 1 4 5139.23 1 5 5706.92 1 6 5282.75 1 7 4048.04 1 8 5311.59 1 9 4662.86 1 10 4361.22 1 1 4718.55 2 2 5439.1 2 3 4643.4 2 4 4515.3 2 5 5110.27 2 6 5910.78 2 7 4987.22 2 8 4846.25 2 9 5458.82 2 10 4898.7 2 20 rows selected. SQL>
SELECT fact_1_id, fact_2_id, fact_3_id, fact_4_id, SUM(sales_value) AS sales_value, GROUPING_ID(fact_1_id, fact_2_id, fact_3_id, fact_4_id) AS grouping_id FROM dimension_tab GROUP BY GROUPING SETS(fact_1_id, fact_2_id), GROUPING SETS(fact_3_id, fact_4_id) ORDER BY fact_1_id, fact_2_id, fact_3_id, fact_4_id;
GROUPING SETS(fact_1_id, fact_2_id) (fact_1_id) (fact_2_id) GROUPING SETS(fact_3_id, fact_4_id) (fact_3_id) (fact_4_id) GROUPING SETS(fact_1_id, fact_2_id), GROUPING SETS(fact_3_id, fact_4_id) (fact_1_id, fact_3_id) (fact_1_id, fact_4_id) (fact_2_id, fact_3_id) (fact_2_id, fact_4_id)在两个GROUPING SETS中将产生笛卡尔积.
GROUPING SETS(a, b), GROUPING SETS(c, d) (a, c) (a, d) (b, c) (b, d)