Clickhouse 查询之 Array JOIN

Array JOIN 子句允许在数据表的内部,与数组或者嵌套的字段进行JOIN操作,从而将一行数据变多行。适用于行转列操作。


CREATE TABLE city
(
    `province` String, 
    `city` Array(String), 
    `rank` Array(UInt8)
)
ENGINE = Log


Ok.

0 rows in set. Elapsed: 0.012 sec. 

insert into city values('hubei',['wuhan','xiangyang'],[1,2]),
('guangdong',['guangzhou','shenzhen','zhuhai'],[1,2,3]);
 
insert into city values('beijing',[],[10]),('shanghai',[],[20]);


查看原始数据:

Clickhouse> select * from city FORMAT PrettyCompactMonoBlock;

SELECT *
FROM city
FORMAT PrettyCompactMonoBlock

┌─province──┬─city──────────────────────────────┬─rank────┐
│ hubei     │ ['wuhan','xiangyang']             │ [1,2]   │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3] │
│ beijing   │ []                                │ [10]    │
│ shanghai  │ []                                │ [20]    │
└───────────┴───────────────────────────────────┴─────────┘


4 rows in set. Elapsed: 0.003 sec. 



1.INNER ARRAY JOIN:

Clickhouse> select province,city from city array join city;

SELECT 
    province, 
    city
FROM city
ARRAY JOIN city

┌─province──┬─city──────┐
│ hubei     │ wuhan     │
│ hubei     │ xiangyang │
│ guangdong │ guangzhou │
│ guangdong │ shenzhen  │
│ guangdong │ zhuhai    │
└───────────┴───────────┘

5 rows in set. Elapsed: 0.003 sec. 

Clickhouse> select province,city original_city,new_city from city array join city as new_city;

SELECT 
    province, 
    city AS original_city, 
    new_city
FROM city
ARRAY JOIN city AS new_city

┌─province──┬─original_city─────────────────────┬─new_city──┐
│ hubei     │ ['wuhan','xiangyang']             │ wuhan     │
│ hubei     │ ['wuhan','xiangyang']             │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen  │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai    │
└───────────┴───────────────────────────────────┴───────────┘

5 rows in set. Elapsed: 0.006 sec. 

2. LEFT Array JOIN :

Clickhouse> select province,city original_city,new_city from city LEFT array join city as new_city FORMAT PrettyCompactMonoBlock;

SELECT 
    province, 
    city AS original_city, 
    new_city
FROM city
LEFT ARRAY JOIN city AS new_city
FORMAT PrettyCompactMonoBlock

┌─province──┬─original_city─────────────────────┬─new_city──┐
│ hubei     │ ['wuhan','xiangyang']             │ wuhan     │
│ hubei     │ ['wuhan','xiangyang']             │ xiangyang │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ guangzhou │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ shenzhen  │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ zhuhai    │
│ beijing   │ []                                │           │
│ shanghai  │ []                                │           │
└───────────┴───────────────────────────────────┴───────────┘

7 rows in set. Elapsed: 0.003 sec. 

当同时对数组字段进行ARRAY JOIN 操作时候,查询的计算逻辑是按行合并而不是产生笛卡尔积:

Clickhouse> select province,arr_city,arr_rank,v ,arrayMap(x->x*x,arr_rank) mapv,v1 from city c left array join arr_rank as v,mapv as v1 FORMAT PrettyCompactMonoBlock;

SELECT 
    province, 
    arr_city, 
    arr_rank, 
    v, 
    arrayMap(x -> (x * x), arr_rank) AS mapv, 
    v1
FROM city AS c
LEFT ARRAY JOIN 
    arr_rank AS v, 
    mapv AS v1
FORMAT PrettyCompactMonoBlock

┌─province──┬─arr_city──────────────────────────┬─arr_rank─┬──v─┬─mapv────┬──v1─┐
│ hubei     │ ['wuhan','xiangyang']             │ [1,2]    │  1 │ [1,4]   │   1 │
│ hubei     │ ['wuhan','xiangyang']             │ [1,2]    │  2 │ [1,4]   │   4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3]  │  1 │ [1,4,9] │   1 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3]  │  2 │ [1,4,9] │   4 │
│ guangdong │ ['guangzhou','shenzhen','zhuhai'] │ [1,2,3]  │  3 │ [1,4,9] │   9 │
│ beijing   │ []                                │ [10]     │ 10 │ [100]   │ 100 │
│ shanghai  │ []                                │ [20]     │ 20 │ [400]   │ 400 │
│ hongkong  │ []                                │ []       │  0 │ []      │   0 │
└───────────┴───────────────────────────────────┴──────────┴────┴─────────┴─────┘

8 rows in set. Elapsed: 0.004 sec. 




--- 
Clickhouse> SELECT province , arr_rank, a, num, mapped
:-] FROM city
:-] ARRAY JOIN arr_rank AS a, arrayEnumerate(arr_rank) AS num, arrayMap(x -> x + 1, arr_rank) AS mapped;

SELECT 
    province, 
    arr_rank, 
    a, 
    num, 
    mapped
FROM city
ARRAY JOIN 
    arr_rank AS a, 
    arrayEnumerate(arr_rank) AS num, 
    arrayMap(x -> (x + 1), arr_rank) AS mapped

┌─province──┬─arr_rank─┬──a─┬─num─┬─mapped─┐
│ hubei     │ [1,2]    │  1 │   1 │      2 │
│ hubei     │ [1,2]    │  2 │   2 │      3 │
│ guangdong │ [1,2,3]  │  1 │   1 │      2 │
│ guangdong │ [1,2,3]  │  2 │   2 │      3 │
│ guangdong │ [1,2,3]  │  3 │   3 │      4 │
│ beijing   │ [10]     │ 10 │   1 │     11 │
│ shanghai  │ [20]     │ 20 │   1 │     21 │
└───────────┴──────────┴────┴─────┴────────┘

7 rows in set. Elapsed: 0.005 sec. 

参考:

https://clickhouse.tech/docs/en/sql-reference/statements/select/array-join/

你可能感兴趣的:(Clickhouse)