【Microsoft SQL Server 2008 技术内幕:T-SQL语言基础】五、透视和分组

Grouping (分组)

-- 1. create test table
use tempdb;

if OBJECT_ID('dbo.Orders', 'u') is not null drop table dbo.Orders;
create table dbo.Orders (
  orderid int not null
  , orderdate date not null -- use datetime type in earlier Sql version
  , empid int not null
  , custid varchar(5) not null
  , qty int not null
  , constraint PK_Orders primary key(orderid)
insert into dbo.Orders(orderid, orderdate, empid, custid, qty)
  (30001, '20070802', 3, 'A', 10)
  , (10001, '20071224', 2, 'A', 12)
  , (10005, '20071224', 1, 'B', 20)
  , (40001, '20080109', 2, 'A', 40)
  , (10006, '20080118', 1, 'C', 14)
  , (20001, '20080212', 2, 'B', 12)
  , (40005, '20090212', 3, 'A', 10)
  , (20002, '20090216', 1, 'C', 20)
  , (30003, '20090418', 2, 'B', 15)
  , (30004, '20070418', 3, 'C', 22)
  , (30007, '20090907', 3, 'D', 30);  
select * from dbo.Orders;  
-- 2. standard T-SQL pivoting
select empid
  , sum(case when custid = 'A' then qty end) as A
  , sum(case when custid = 'B' then qty end) as B
  , sum(case when custid = 'C' then qty end) as C
  , sum(case when custid = 'D' then qty end) as D
from dbo.Orders
group by empid;
-- 3. pivot operator pivoting
/* grammar
 * see FROM(Transact-SQL) 
select ...
from <source_table_or_table_expression>
    for <spreading_element>
    in (<list_of_target_columns>)
  ) as <result_talbe_alias>;
select empid
	, A, B, C, D -- same with column list in IN clause below
from (
    select empid, custid, qty
    from dbo.Orders 
  ) as D
  pivot (sum(qty) for custid in (A,B,C,D)) as P;  

1. Pivot运算符不需要显示指定分组元素,即亦不需要指定Group By子句。
2. 所以一般不会把PIVOT算符应用于源表,而是应用于表表达式。
3. 即使该源表中只包含这三类元素,还是应该把Pivot应用于表表达式。
-- 4. create test table2, a pivot table
use tempdb;
if OBJECT_ID('dbo.EmpCustOrders', 'u') is not null drop table dbo.EmpCustOrders;

select empid, A, B, C, D
into dbo.EmpCustOrders
from (
    select empid, custid, qty
    from dbo.Orders
  ) as D
  pivot(sum(qty) for custid in (A, B, C, D)) as P;
select * from dbo.EmpCustOrders;
-- 5. standard T-SQL unpivoting
select *
from (  
  select empid, custid
    , case custid
        when 'A' then A
        when 'B' then B
        when 'C' then C
        when 'D' then D
      end as qty
  --select *    
  from dbo.EmpCustOrders
    cross join (
	  -- Sql ealier than 2008, need to use select + union below
      values('A'), ('B'), ('C'), ('D')	
    ) as Custs(custid)
) as D
-- remove null rows that represents no relationship in JOIN
where qty is not null;
-- 6. unpivot operator unpivoting
/* grammar
 * see FROM(Transact-SQL) 
select ...
from <source_table_or_table_expression>
    for <target_col_to_hold_source_col_names>
    in (<list_of_source_columns>)
  ) as <result_talbe_alias>;
select empid
	, custid, qty -- same as columns in IN clause below
from dbo.EmpCustOrders
  unpivot(qty for custid in (A, B, C, D)) as U;
1. Unpivot运算符需要在圆括号内指定:
   源表的列名列表(A, B, C, D)
2. Unpivot运算符会经历和前边第4个例子(t-sql unpivoting)中相同的三个步骤:
   删除交叉位置上的null值 (此步骤相对于t-sql方案不可选)
3. 经过透视变换后再逆透视不能得到相同的源表。

use tempdb;
-- 1. grouping examples
select empid, custid, SUM(qty) as sumqty
from dbo.Orders
group by empid, custid
union all
select empid, null, SUM(qty) as sumqty
from dbo.Orders
group by empid
union all
select null, custid, SUM(qty) as sumqty
from dbo.Orders
group by custid
union all
select null, null, SUM(qty) as sumqty
from dbo.Orders
-- 2. grouping sets sub clause
-- logically equals to example 1
-- advantages:
-- a. less table scan
-- b. less code
select empid, custid, SUM(qty) as sumqty
from dbo.Orders
group by GROUPING sets (
  (empid, custid)
  , (empid)
  , (custid)
  , ()
-- 3. cube sub clause
-- equals to example 2
-- CUBE(a,b,c) means the power set of (a, b, c):
--   all the combination sets of a, b and c
select empid, custid, SUM(qty) as sumqty
from dbo.Orders
group by cube(empid, custid) -- T-SQL standard
--group by empid, custid with cube -- Sql Server standard
-- 4. rollup sub clause
-- ROLLUP(a,b,c) differs from CUBE(a,b,c), it only returns such sub sets:
--   (a, b, c), (a, b), (a), ()
-- which means the level of input sets: a > b > c
select YEAR(orderdate), MONTH(orderdate), DAY(orderdate)
  , SUM(qty) as sumqty
from dbo.Orders
group by rollup(
  YEAR(orderdate), MONTH(orderdate), DAY(orderdate)
); -- T-SQL standard
--group by YEAR(orderdate), MONTH(orderdate), DAY(orderdate) 
--with rollup; -- Sql server standard
/* equals to: 
group by GROUPING sets (
  (YEAR(orderdate), MONTH(orderdate), DAY(orderdate))
  , (YEAR(orderdate), MONTH(orderdate))
  , (YEAR(orderdate))
  , ()
-- 5. grouping function
-- Indicates whether a specified column expression in GROUP BY is 
--   aggregated or not.
  grouping(empid) as grpemp
  , grouping(custid) as grpcust
  , empid, custid, SUM(qty) as sumqty
from dbo.Orders
group by cube(empid, custid);
a. 如果像本文中的例子一样,所有的列都设定为not null,那么在结果中出现的
b. 但是如果定义了允许为null的列就无法用上边的方法了,此时只能使用GROUPING
-- 5. grouping_id function
-- Sql 2008 
-- Returns a integer bitmap representing the input columns' presence
--   in grouping operation. The left most bit corresponds to the first
--   (left most)column in the input list...
  grouping_id(empid, custid) as groupingset  
  , empid, custid, SUM(qty) as sumqty
from dbo.Orders
group by cube(empid, custid);

