说到位图我们就不得不从位算开始,虽然大部分语言都有提供位运算,但是,并没有提供一种类似于位数组的类型,要使用这些位运算,我们只能通过数字类型来实现,比如Java中的int/long等类型。而这些数字类型的数组,我们一般可以称之为“位图”(BitMap)。
位图bitmap
是一种非常常用的结构,在索引,数据压缩等方面有广泛应用。所谓的 bitmap 就是用一个 bit 位来标记某个元素对应的 value, 而 key 即是该元素。由于采用了 bit 为单位来存储数据,因此在存储空间方面,可以大大节省。
位图函数用于对两个位图对象进行计算,对于任何一个 位图函数
,它都将返回一个 位图对象
,例如and,or,xor,not等等。比如:x y
就是位图对象,f
就是位图函数,f(x,y)
就是位图对象。
位图对象有两种构造方法。一个是由聚合函数groupBitmapState
构造的,另一个是由Array Object
构造的。同时还可以将位图对象转化为数组对象。
我们使用RoaringBitmap
实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap
保存。这也是为什么低基数集的存储更快的原因。
CREATE TABLE test.bit_map
(
`user_id` UInt64
)
ENGINE = MergeTree
ORDER BY user_id
SETTINGS index_granularity = 8192
数据如下:
┌─user_id─┐
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
│ 10 │
│ 11 │
└─────────┘
UInt64
。AggregateFunction(groupBitmap,UInt64)
SELECT groupBitmapState(toUInt64(user_id)) as a,
toTypeName(a)
from test.bit_map;
array
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))) AS user_bit_map,
toTypeName(user_bit_map)
FROM test.bit_map
AggregateFunction(groupBitmap, UInt64)
。SELECT
bitmapToArray(groupBitmapState(toUInt64(user_id))) AS user_bit_map,
toTypeName(user_bit_map)
FROM test.bit_map
bitmapContains(haystack, needle)
haystack
- 位图对象, AggregateFunction(groupBitmap, UInt64)
。needle
- 元素,类型UInt32
。boolean
类型,或者枚举类型。SELECT bitmapContains(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(9))AS user_bit_map,
toTypeName(user_bit_map)
FROM test.bit_map
bitmapCardinality(bitmap)
UInt64
类型的数值,表示位图对象的基数。AggregateFunction(groupBitmap, UInt64)
。UInt64
,可以理解为位图对象数组中元素的个数。SELECT bitmapCardinality(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id))))) AS user_bit_map,
toTypeName(user_bit_map)
FROM test.bit_map
bitmapMin(bitmap)
AggregateFunction(groupBitmap, UInt64)
。UInt64
类型的数值,表示位图中的最小值。如果位图为空则返回UINT32_MAX
。SELECT bitmapMin(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id))))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type,
bitmapMin(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(nan))))) AS user_bit_map_1,
toTypeName(user_bit_map) AS user_bit_map_1_type
FROM test.bit_map
bitmapMax(bitmap)
AggregateFunction(groupBitmap, UInt64)
。UInt64
类型的数值,表示位图中的最大值。如果位图为空则返回0
。SELECT bitmapMax(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id))))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type,
bitmapMax(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(nan))))) AS user_bit_map_1,
toTypeName(user_bit_map) AS user_bit_map_1_type
FROM test.bit_map
bitmapSubsetInRange(bitmap, range_start, range_end)
bitmap
– 位图对象, AggregateFunction(groupBitmap, UInt64)
。range_start
– 范围起始点(含),类型为UInt32
。range_end
– 范围结束点(不含),类型UInt32
。SELECT bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapSubsetLimit(bitmap, range_start, limit)
bitmap
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。range_start
– 范围起始点(含),类型为UInt32
。limit
– 子位图基数上限,类型为UInt32
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapSubsetLimit(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
hasAny(array,array)
类似,判断两个位图对象是包含相同的元素。bitmapHasAny(bitmap,bitmap)
bitmap
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapHasAny(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(1),toUInt32(7))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
hasAll(array,array)
类似,判断第一个位图是否包含第二个位图的所有元素。bitmapHasAll(bitmap,bitmap)
bitmap
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapHasAll(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapAnd(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapAnd(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapOr(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapOr(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapXor(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapXor(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapAndnot(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapAndnot(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapAndCardinality(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。UInt64
。SELECT bitmapAndCardinality(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapOrCardinality(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。UInt64
。SELECT bitmapOrCardinality(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapXorCardinality(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。UInt64
。SELECT bitmapXorCardinality(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapAndnotCardinality(bitmap1,bitmap2)
bitmap1
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。bitmap2
– 位图对象,AggregateFunction(groupBitmap, UInt64)
。UInt64
。SELECT bitmapAndnotCardinality(bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(6),toUInt32(10)),bitmapSubsetInRange(bitmapBuild(bitmapToArray(groupBitmapState(toUInt64(user_id)))),toUInt32(7),toUInt32(9))) AS user_bit_map,
toTypeName(user_bit_map) AS user_bit_map_type
FROM test.bit_map
bitmapTransform(bitmap, from_array, to_array)
bitmap
– 位图对象,类型为AggregateFunction(groupBitmap, UInt64)
。from_array
– 类型为Array(UInt32)
。对于范围[0,from_array.size()]
中的idx
,如果bitmap
包含from_array[idx]
,则将其替换为to_array[idx]
。注意,如果from_array
和to_array
之间有公共元素,则结果取决于数组排序。to_array
– 类型为Array(UInt32)
, 它的大小应该与from_array
相同。AggregateFunction(groupBitmap, UInt64)
。SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)))) AS res
通过以上我们了解到位图对象有两种构造方法。其一是由聚合函数groupBitmapState构造的,其二是由Array Object构造的。同时还可以将位图对象转化为数组对象。对于任何一个位图函数,计算结果都将返回一个位图对象。带有Has的判断函数返回的数值是逻辑值0或者1;其中带有Cardinality后缀的函数返回的数值是运算后的结果位图的基数;其他的位图运算返回的是结果位图,例如And,Or,Xor,Andnot等。到此我们关于clickhouse位图的讲解就结束了,希望可以帮到大家。