ClickHouse 是俄罗斯的 Yandex 于 2016 年开源的 列式存储 数据库(DBMS),使用 C++ 语言编写,主要用于在线分析处理查询(OLAP),能够使用 SQL 查询实时生成分析数据报告。
不同需求可以设定不同的存储引擎
。目前包括合并树、日志、接口和其他四大类 20 多种引擎。定期在后台 Compaction
。通过类 LSM tree的结构,ClickHouse 在数据导入时全部是 顺序 append 写
,写入后数据段不可更改
,在后台compaction 时也是多个段 merge sort 后顺序写回磁盘。单条 Query 就能利用整机所有 CPU
。ClickHouse 的优势时大数量级的单张宽表的聚合查询分析。
参考:https://clickhouse.tech/docs/en/getting-started/install/#from-rpm-packages
单机即可满足大部分学习场景,生产需要再考虑部署高可用或分布式集群(因为还会增添操作上的麻烦)。
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client
sudo systemctl start clickhouse-server.service
clickhouse-client
[root@cloud-mn01 ~]# clickhouse-client
ClickHouse client version 21.7.5.29 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 21.7.5 revision 54449.
cloud-mn01 :) show databases;
SHOW DATABASES
Query id: 7efd3129-2076-4eff-8bac-2a314abf7b78
┌─name────┐
│ default │
│ system │
└─────────┘
2 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :) Bye.
[root@cloud-mn01 ~]# clickhouse-client --query "show databases"
default
system
[root@cloud-mn01 ~]#
/etc/clickhouse-server/config.xml
<zookeeper>
<node index="1">
<host>hadoop102host>
<port>2181port>
node>
<node index="2">
<host>hadoop103host>
<port>2181port>
node>
<node index="3">
<host>hadoop104host>
<port>2181port>
node>
zookeeper>
副本只能同步数据,不能同步表结构,所以我们需要在每台机器上自己手动建表。
ReplicatedMergeTree 中,第一个参数是分片的 zk_path 一般按照:/clickhouse/table/{shard}/{table_name} 的格式写,如果只有一个分片就写 01 即可。第二个参数是副本名称,相同的分片副本名称不能相同。
CREATE TABLE t_order_rep2
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` Datetime
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/01/t_order_rep', 'rep_101')
PARTITION BY toYYYYMMDD(create_time)
PRIMARY KEY id
ORDER BY (id, sku_id)
CREATE TABLE t_order_rep2
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` Datetime
)
ENGINE = ReplicatedMergeTree('/clickhouse/table/01/t_order_rep', 'rep_102')
PARTITION BY toYYYYMMDD(create_time)
PRIMARY KEY id
ORDER BY (id, sku_id)
/etc/clickhouse-server/config.xml
<remote_servers>
<gmall_cluster>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>hadoop101host>
<port>9000port>
replica>
<replica>
<host>hadoop102host>
<port>9000port>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>hadoop103host>
<port>9000port>
replica>
<replica>
<host>hadoop104host>
<port>9000port>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>hadoop105host>
<port>9000port>
replica>
<replica>
<host>hadoop106host>
<port>9000port>
replica>
shard>
gmall_cluster>
remote_servers>
<zookeeper>
<node index="1">
<host>hadoop102host>
<port>2181port>
node>
<node index="2">
<host>hadoop103host>
<port>2181port>
node>
<node index="3">
<host>hadoop104host>
<port>2181port>
node>
zookeeper>
<macros>
<shard>01shard>
<replica>rep_1_1replica>
macros>
此表中仅可以查到其分片中的数据
CREATE TABLE st_order_mt ON CLUSTER gmall_cluster
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` Datetime
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/st_order_mt', '{replica}')
PARTITION BY toYYYYMMDD(create_time)
PRIMARY KEY id
ORDER BY (id, sku_id)
此表中可以查到所有的数据
CREATE TABLE st_order_mt_all2 ON CLUSTER gmall_cluster
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` Datetime
)
ENGINE = Distributed(gmall_cluster, default, st_order_mt, hiveHash(sku_id))
完整数据类型文档参考:https://clickhouse.tech/docs/en/sql-reference/data-types/#data_types
类型 | 范围 |
---|---|
Int8 | [-128 : 127] |
Int16 | [-32768 : 32767] |
Int32 | [-2147483648 : 2147483647] |
Int64 | [-9223372036854775808 : 9223372036854775807] |
UInt8 | [0 : 255] |
UInt16 | [0 : 65535] |
UInt32 | [0 : 4294967295] |
UInt64 | [0 : 18446744073709551615] |
类型 | 说明 |
---|---|
Float32 | float |
Float64 | double |
建议尽可能以整数形式存储数据,因为浮点型进行计算时可能引起四舍五入的误差。
cloud-mn01 :) select 1.0 - 0.9;
SELECT 1. - 0.9
Query id: ec6a31cf-42df-418e-bcd5-63b3732ecb44
┌──────minus(1., 0.9)─┐
│ 0.09999999999999998 │
└─────────────────────┘
1 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
没有单独的类型来存储布尔值。可以使用 UInt8 类型,取值限制为 0 或 1。
有符号的浮点数,可在加、减和乘法运算过程中保持精度。对于除法,最低有效数字会被丢弃(不舍入)。
类型 | 说明 |
---|---|
Decimal32(s) | 相当于 Decimal(9-s,s),有效位数为 1~9(s标识小数位数) |
Decimal64(s) | 相当于 Decimal(18-s,s),有效位数为 1~18 |
Decimal128(s) | 相当于 Decimal(38-s,s),有效位数为 1~38 |
字符串可以任意长度的。它可以包含任意的字节集,包含空字节。
固定长度 N 的字符串,N 必须是严格的正自然数。当服务端读取长度小于 N 的字符串时候,通过在字符串末尾添加空字节来达到 N 字节长度。 当服务端读取长度大于 N 的字符串时候,将返回错误消息。
包括 Enum8 和 Enum16 类型。Enum 保存 ‘string’= integer 的对应关系。
# 使用 -m 选项可以支持换行
[root@cloud-mn01 ~]# clickhouse-client -m
ClickHouse client version 21.7.5.29 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 21.7.5 revision 54449.
# 创建
cloud-mn01 :) CREATE TABLE t_enum
:-] (
:-] x Enum8('hello' = 1, 'world' = 2)
:-] )
:-] ENGINE = TinyLog;
CREATE TABLE t_enum
(
`x` Enum8('hello' = 1, 'world' = 2)
)
ENGINE = TinyLog
Query id: b1bdb268-0cd1-4d1a-ad5a-59fc767bb85d
Ok.
0 rows in set. Elapsed: 0.008 sec.
# 插入
cloud-mn01 :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello');
INSERT INTO t_enum VALUES
Query id: 16a4ae7c-20a8-4a2c-a4f3-0201823740ca
Ok.
3 rows in set. Elapsed: 0.002 sec.
# 查看 enum 值对应的数字
cloud-mn01 :) SELECT CAST(x, 'Int8') FROM t_enum;
SELECT CAST(x, 'Int8')
FROM t_enum
Query id: f9a69904-c5ef-4157-940b-bd171c040063
┌─CAST(x, 'Int8')─┐
│ 1 │
│ 2 │
│ 1 │
└─────────────────┘
3 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
类型 | 说明 |
---|---|
Date | 接受年-月-日的字符串比如 ‘2019-12-16’ |
Datetime | 接受年-月-日 时:分:秒的字符串比如 ‘2019-12-16 20:50:10’ |
Datetime64 | 接受年-月-日 时:分:秒.亚秒的字符串比如‘2019-12-16 20:50:10.66’ |
Array(T):由 T 类型元素组成的数组。
# 创建数组方式 1,使用 array 函数
cloud-mn01 :) SELECT array(1, 2) AS x, toTypeName(x);
SELECT
[1, 2] AS x,
toTypeName(x)
Query id: 30ac6d4c-854e-49b2-bc19-ed1529aa0dde
┌─x─────┬─toTypeName(array(1, 2))─┐
│ [1,2] │ Array(UInt8) │
└───────┴─────────────────────────┘
1 rows in set. Elapsed: 0.002 sec.
# 创建数组方式 2:使用方括号
cloud-mn01 :) SELECT [1, 2] AS x, toTypeName(x);
SELECT
[1, 2] AS x,
toTypeName(x)
Query id: 9a6701df-9622-46a3-9a91-a0ad968f6f0a
┌─x─────┬─toTypeName([1, 2])─┐
│ [1,2] │ Array(UInt8) │
└───────┴────────────────────┘
1 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
为了区别空值,clickhouse 额外要存储 masks 文件,相对普通值会消耗更多空间。尽量避免空值的使用,可以存储以业务侧无意义的值如 -1 标识空值。
完整表引擎清单参见:https://clickhouse.tech/docs/en/engines/table-engines/#table_engines
表引擎是 ClickHouse 的一大特色。可以说, 表引擎决定了如何存储表的数据。
以列文件的形式保存在磁盘上,不支持索引,没有并发控制。一般保存少量数据的小表,生产环境上作用有限。可以用于平时练习测试用。
cloud-mn01 :) show create table t_tinylog;
SHOW CREATE TABLE t_tinylog
Query id: 9f444ef0-6b2d-4cc7-af79-32e885db9c7a
┌─statement─────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_tinylog
(
`id` String,
`name` String
)
ENGINE = TinyLog │
└───────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
The MySQL engine allows you to perform SELECT and INSERT queries on data that is stored on a remote MySQL server.
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
SETTINGS
[connection_pool_size=16, ]
[connection_max_tries=3, ]
[connection_auto_close=true ]
;
cloud-mn01 :) SHOW CREATE TABLE mysql;
SHOW CREATE TABLE mysql
Query id: 1d8f5ea0-0f46-4ad8-8033-aa96b8cdb2b1
┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.mysql
(
`name` String,
`age` Int8
)
ENGINE = MySQL('127.0.0.1:3306', 'clickhouse', 'person', 'root', '')
SETTINGS connection_pool_size = 16, connection_max_tries = 3, connection_auto_close = 1 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :) SELECT * FROM mysql;
SELECT *
FROM mysql
Query id: 724bea04-d126-474b-b814-ab9162f41822
┌─name────┬─age─┐
│ rayslee │ 18 │
└─────────┴─────┘
1 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :) INSERT INTO mysql VALUES('lily', 19);
INSERT INTO mysql VALUES
Query id: 27d72eaa-4c10-461f-ace4-ffab589493a4
Ok.
1 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
[WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
[SETTINGS name=value, ...]
cloud-mn01 :) SHOW CREATE TABLE t_order_mt;
SHOW CREATE TABLE t_order_mt
Query id: e8257846-ed21-40b8-854f-e91ecb6f5a02
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_order_mt
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` DateTime
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(create_time)
PRIMARY KEY id
ORDER BY (id, sku_id)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.004 sec.
cloud-mn01 :)
cloud-mn01 :) insert into t_order_mt values
:-] (101,'sku_001',1000.00,'2020-06-01 12:00:00') ,
:-] (102,'sku_002',2000.00,'2020-06-01 11:00:00'),
:-] (102,'sku_004',2500.00,'2020-06-01 12:00:00'),
:-] (102,'sku_002',2000.00,'2020-06-01 13:00:00'),
:-] (102,'sku_002',12000.00,'2020-06-01 13:00:00'),
:-] (102,'sku_002',600.00,'2020-06-02 12:00:00');
INSERT INTO t_order_mt VALUES
Query id: 60cdb8e5-3f91-4a42-9d2c-14d93698b23e
Ok.
6 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :) select * from t_order_mt;
SELECT *
FROM t_order_mt
Query id: 96fb5d88-18a8-4bf5-89df-3e87cfaa301f
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 102 │ sku_002 │ 600.00 │ 2020-06-02 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 11:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 12000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
6 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :) insert into t_order_mt values
:-] (101,'sku_001',1000.00,'2020-06-01 12:00:00') ,
:-] (102,'sku_002',2000.00,'2020-06-01 11:00:00'),
:-] (102,'sku_004',2500.00,'2020-06-01 12:00:00'),
:-] (102,'sku_002',2000.00,'2020-06-01 13:00:00'),
:-] (102,'sku_002',12000.00,'2020-06-01 13:00:00'),
:-] (102,'sku_002',600.00,'2020-06-02 12:00:00');
INSERT INTO t_order_mt VALUES
Query id: e58fb4bf-2d69-40e6-857e-d91b049a974d
Ok.
6 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :) select * from t_order_mt;
SELECT *
FROM t_order_mt
Query id: 98b097f3-489c-4cc7-81ff-d1fe77e21a50
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 102 │ sku_002 │ 600.00 │ 2020-06-02 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 102 │ sku_002 │ 600.00 │ 2020-06-02 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 11:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 12000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 11:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 12000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
12 rows in set. Elapsed: 0.004 sec.
cloud-mn01 :) optimize table t_order_mt final;
OPTIMIZE TABLE t_order_mt FINAL
Query id: 6c242f60-e096-4293-b936-9df17763ffb1
Ok.
0 rows in set. Elapsed: 0.005 sec.
cloud-mn01 :) select * from t_order_mt;
SELECT *
FROM t_order_mt
Query id: 670d8871-bec7-43d4-9141-1347602ed24a
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 102 │ sku_002 │ 600.00 │ 2020-06-02 12:00:00 │
│ 102 │ sku_002 │ 600.00 │ 2020-06-02 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 11:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 12000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 11:00:00 │
│ 102 │ sku_002 │ 2000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_002 │ 12000.00 │ 2020-06-01 13:00:00 │
│ 102 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
│ 102 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
12 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
# 例20200602_2_4_1 = PartitionId_MinBlockNum_MaxBlockNum_Level
# =》PartitionId
# 分区值
# =》MinBlockNum
# 最小分区块编号,自增类型,从1开始向上递增。每产生一个新的目录分区就向上递增一个数字。
# =》MaxBlockNum
# 最大分区块编号,新创建的分区MinBlockNum等于MaxBlockNum的编号。
# =》Level
# 合并的层级,被合并的次数。合并次数越多,层级值越大。
[root@cloud-mn01 t_order_mt]# pwd
/var/lib/clickhouse/data/default/t_order_mt
[root@cloud-mn01 t_order_mt]# ll
total 4
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:32 20200601_1_1_0
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:34 20200601_1_3_1
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:33 20200601_3_3_0
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:32 20200602_2_2_0
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:34 20200602_2_4_1
drwxr-x--- 2 clickhouse clickhouse 203 Aug 1 12:33 20200602_4_4_0
drwxr-x--- 2 clickhouse clickhouse 6 Aug 1 12:29 detached
-rw-r----- 1 clickhouse clickhouse 1 Aug 1 12:29 format_version.txt
[root@cloud-mn01 t_order_mt]#
ClickHouse 中的主键,和其他数据库不太一样,它只提供了数据的一级索引,但是却不是唯一约束。 这就意味着是可以存在相同 primary key 的数据的。
二级索引能够为非主键字段的查询发挥作用。
GRANULARITY N 是设定二级索引对于一级索引粒度的粒度,即 N * index_granularity 行生成一个索引。
cloud-mn01 :) SHOW CREATE TABLE t_order_mt2;
SHOW CREATE TABLE t_order_mt2
Query id: d23601cf-3f9a-44f6-bde1-51735d7c31a4
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_order_mt2
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` DateTime,
INDEX a total_amount TYPE minmax GRANULARITY 5
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(create_time)
PRIMARY KEY id
ORDER BY (id, sku_id)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
TTL 即 Time To Live,MergeTree 提供了可以管理数据表或者列的生命周期的功能。
# 创建时指定
CREATE TABLE example_table
(
d DateTime,
a Int TTL d + INTERVAL 1 MONTH,
b Int TTL d + INTERVAL 1 MONTH,
c String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d;
# 创建后指定
ALTER TABLE example_table
MODIFY COLUMN
c String TTL d + INTERVAL 1 DAY;
# 创建时指定
CREATE TABLE example_table
(
d DateTime,
a Int
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d
TTL d + INTERVAL 1 MONTH [DELETE],
d + INTERVAL 1 WEEK TO VOLUME 'aaa',
d + INTERVAL 2 WEEK TO DISK 'bbb';
# 创建后指定
ALTER TABLE example_table
MODIFY TTL d + INTERVAL 1 DAY;
create table t_order_rmt(
id UInt32,
sku_id String,
total_amount Decimal(16,2) ,
create_time Datetime
) engine =ReplacingMergeTree(create_time)
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
分区 “预聚合” 的引擎 SummingMergeTree
create table t_order_smt(
id UInt32,
sku_id String,
total_amount Decimal(16,2) ,
create_time Datetime
) engine =SummingMergeTree(total_amount)
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id );
alter table t_order_smt delete where sku_id =‘sku_001’;
alter table t_order_smt update total_amount=toDecimal32(2000.00,2) where id = 102;
GROUP BY 操作增加了 with rollup\with cube\with total 用来计算小计和总计。
cloud-mn01 :) select * from t_order_mt;
SELECT *
FROM t_order_mt
Query id: be6ba693-9ad0-45e9-9b05-9cb0aeba1213
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 105 │ sku_003 │ 600.00 │ 2020-06-02 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 101 │ sku_001 │ 1000.00 │ 2020-06-01 12:00:00 │
│ 101 │ sku_002 │ 2000.00 │ 2020-06-01 12:00:00 │
│ 103 │ sku_004 │ 2500.00 │ 2020-06-01 12:00:00 │
│ 104 │ sku_002 │ 2000.00 │ 2020-06-01 12:00:00 │
│ 110 │ sku_003 │ 600.00 │ 2020-06-01 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
┌──id─┬─sku_id──┬─total_amount─┬─────────create_time─┐
│ 106 │ sku_001 │ 1000.00 │ 2020-06-04 12:00:00 │
│ 107 │ sku_002 │ 2000.00 │ 2020-06-04 12:00:00 │
│ 108 │ sku_004 │ 2500.00 │ 2020-06-04 12:00:00 │
│ 109 │ sku_002 │ 2000.00 │ 2020-06-04 12:00:00 │
└─────┴─────────┴──────────────┴─────────────────────┘
10 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
cloud-mn01 :) select id , sku_id,sum(total_amount) from t_order_mt group by
:-] id,sku_id with rollup;
SELECT
id,
sku_id,
sum(total_amount)
FROM t_order_mt
GROUP BY
id,
sku_id
WITH ROLLUP
Query id: a43fba98-5bcf-4bf2-96d1-cf2a41b2891d
┌──id─┬─sku_id──┬─sum(total_amount)─┐
│ 110 │ sku_003 │ 600.00 │
│ 109 │ sku_002 │ 2000.00 │
│ 107 │ sku_002 │ 2000.00 │
│ 106 │ sku_001 │ 1000.00 │
│ 104 │ sku_002 │ 2000.00 │
│ 101 │ sku_002 │ 2000.00 │
│ 103 │ sku_004 │ 2500.00 │
│ 108 │ sku_004 │ 2500.00 │
│ 105 │ sku_003 │ 600.00 │
│ 101 │ sku_001 │ 1000.00 │
└─────┴─────────┴───────────────────┘
┌──id─┬─sku_id─┬─sum(total_amount)─┐
│ 110 │ │ 600.00 │
│ 106 │ │ 1000.00 │
│ 105 │ │ 600.00 │
│ 109 │ │ 2000.00 │
│ 107 │ │ 2000.00 │
│ 104 │ │ 2000.00 │
│ 103 │ │ 2500.00 │
│ 108 │ │ 2500.00 │
│ 101 │ │ 3000.00 │
└─────┴────────┴───────────────────┘
┌─id─┬─sku_id─┬─sum(total_amount)─┐
│ 0 │ │ 16200.00 │
└────┴────────┴───────────────────┘
20 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
cloud-mn01 :) select id , sku_id,sum(total_amount) from t_order_mt group by
:-] id,sku_id with cube;
SELECT
id,
sku_id,
sum(total_amount)
FROM t_order_mt
GROUP BY
id,
sku_id
WITH CUBE
Query id: df7b72c9-5e06-4ddf-a8f1-5ad1e3e195ad
┌──id─┬─sku_id──┬─sum(total_amount)─┐
│ 110 │ sku_003 │ 600.00 │
│ 109 │ sku_002 │ 2000.00 │
│ 107 │ sku_002 │ 2000.00 │
│ 106 │ sku_001 │ 1000.00 │
│ 104 │ sku_002 │ 2000.00 │
│ 101 │ sku_002 │ 2000.00 │
│ 103 │ sku_004 │ 2500.00 │
│ 108 │ sku_004 │ 2500.00 │
│ 105 │ sku_003 │ 600.00 │
│ 101 │ sku_001 │ 1000.00 │
└─────┴─────────┴───────────────────┘
┌──id─┬─sku_id─┬─sum(total_amount)─┐
│ 110 │ │ 600.00 │
│ 106 │ │ 1000.00 │
│ 105 │ │ 600.00 │
│ 109 │ │ 2000.00 │
│ 107 │ │ 2000.00 │
│ 104 │ │ 2000.00 │
│ 103 │ │ 2500.00 │
│ 108 │ │ 2500.00 │
│ 101 │ │ 3000.00 │
└─────┴────────┴───────────────────┘
┌─id─┬─sku_id──┬─sum(total_amount)─┐
│ 0 │ sku_003 │ 1200.00 │
│ 0 │ sku_004 │ 5000.00 │
│ 0 │ sku_001 │ 2000.00 │
│ 0 │ sku_002 │ 8000.00 │
└────┴─────────┴───────────────────┘
┌─id─┬─sku_id─┬─sum(total_amount)─┐
│ 0 │ │ 16200.00 │
└────┴────────┴───────────────────┘
24 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
cloud-mn01 :) select id , sku_id,sum(total_amount) from t_order_mt group by
:-] id,sku_id with totals;
SELECT
id,
sku_id,
sum(total_amount)
FROM t_order_mt
GROUP BY
id,
sku_id
WITH TOTALS
Query id: 0e5d8a29-253e-4f5d-90a1-73c81122837f
┌──id─┬─sku_id──┬─sum(total_amount)─┐
│ 110 │ sku_003 │ 600.00 │
│ 109 │ sku_002 │ 2000.00 │
│ 107 │ sku_002 │ 2000.00 │
│ 106 │ sku_001 │ 1000.00 │
│ 104 │ sku_002 │ 2000.00 │
│ 101 │ sku_002 │ 2000.00 │
│ 103 │ sku_004 │ 2500.00 │
│ 108 │ sku_004 │ 2500.00 │
│ 105 │ sku_003 │ 600.00 │
│ 101 │ sku_001 │ 1000.00 │
└─────┴─────────┴───────────────────┘
Totals:
┌─id─┬─sku_id─┬─sum(total_amount)─┐
│ 0 │ │ 16200.00 │
└────┴────────┴───────────────────┘
10 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
clickhouse-client --query “select * from t_order_mt where create_time=‘2020-06-01 12:00:00’” --format CSVWithNames> /opt/module/data/rs1.csv
alter table tableName add column newcolname String after col1;
alter table tableName modify column newcolname String;
alter table tableName drop column newcolname;
CREATE TABLE t_type2
(
`id` UInt32,
`sku_id` String,
`total_amount` Decimal(16, 2),
`create_time` Int32
)
ENGINE = ReplacingMergeTree(create_time)
PARTITION BY toYYYYMMDD(toDate(create_time)) --需要转换一次,否则报错
PRIMARY KEY id
ORDER BY (id, sku_id)
CREATE TABLE t_null
(
`x` Int8,
`y` Nullable(Int8)
)
ENGINE = TinyLog
[root@cloud-mn01 t_null]# pwd
/var/lib/clickhouse/data/default/t_null
[root@cloud-mn01 t_null]#
[root@cloud-mn01 t_null]# ll
total 16
-rw-r----- 1 clickhouse clickhouse 91 Aug 10 09:21 sizes.json
-rw-r----- 1 clickhouse clickhouse 28 Aug 10 09:21 x.bin
-rw-r----- 1 clickhouse clickhouse 28 Aug 10 09:21 y.bin
-rw-r----- 1 clickhouse clickhouse 28 Aug 10 09:21 y.null.bin
[root@cloud-mn01 t_null]#
-- 官方 hits_v1
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
-- 官方 visits_v1
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
-- 使用 WAL 预写日志,提高写入性能(in_memory_parts_enable_wal 默认为 true)。
1. Code: 252, e.displayText() = DB::Exception: Too many parts(304).
Merges are processing significantly slower than inserts
-- 在服务器内存充裕的情况下增加内存配额,一般通过 max_memory_usage 来实现
-- 在服务器内存不充裕的情况下,建议将超出部分内容分配到系统硬盘上,但会降低执行速度,一般通过
-- max_bytes_before_external_group_by、max_bytes_before_external_sort 参数来实现。
2. Code: 241, e.displayText() = DB::Exception: Memory limit (for query)
exceeded:would use 9.37 GiB (attempt to allocate chunk of 301989888
bytes), maximum: 9.31 GiB
配置 | 描述 |
---|---|
background_pool_size | 后台线程池的大小,merge 线程就是在该线程池中执行,该线程池不仅仅是给 merge 线程用的,默认值 16,允许的前提下建议改成 cpu 个数的 2 倍(线程数)。 |
background_schedule_pool_size | 执行后台任务(复制表、Kafka 流、DNS 缓存更新)的线程数。默认 128,建议改成 cpu 个数的 2 倍(线程数)。 |
background_distributed_schedule_pool_size | 设置为分布式发送执行后台任务的线程数,默认 16,建议改成 cpu 个数的 2 倍(线程数)。 |
max_concurrent_queries | 最大并发处理的请求数(包含 select,insert 等),默认值 100,推荐 150(不够再加)~300。 |
max_threads | 设置单个查询所能使用的最大 cpu 个数,默认是 cpu 核数 |
max_memory_usage | 此参数在 users.xml 中,表示单次 Query 占用内存最大值,该值可以设置的比较大,这样可以提升集群查询的上限。保留一点给 OS,比如 128G 内存的机器,设置为 100GB。 |
max_bytes_before_external_group_by | 一般按照 max_memory_usage 的一半设置内存,当 group 使用内存超过阈值后会刷新到磁盘进行。因为 clickhouse 聚合分两个阶段:查询并及建立中间数据、合并中间数据,结合上一项,建议 50GB。 |
max_bytes_before_external_sort | 当 order by 已使用 max_bytes_before_external_sort 内存就进行溢写磁盘(基于磁盘排序),如果不设置该值,那么当内存不够时直接抛错,设置了该值 order by 可以正常完成,但是速度相对存内存来说肯定要慢点(实测慢的非常多,无法接受)。 |
max_table_size_to_drop | 此参数在 config.xml 中,应用于需要删除表或分区的情况,默认是50GB,意思是如果删除 50GB 以上的分区表会失败。建议修改为 0,这样不管多大的分区表都可以删除。 |
EXPLAIN [AST | SYNTAX | PLAN | PIPELINE] [setting = value, …] SELECT … [FORMAT …]
EXPLAIN
SELECT number
FROM system.numbers
LIMIT 10
Query id: bbc68c47-0848-4219-ae39-ba0a744df1dd
┌─explain───────────────────────────────────────────────────────────────────┐
│ Expression ((Projection + Before ORDER BY)) │
│ SettingQuotaAndLimits (Set limits and quota after reading from storage) │
│ Limit (preliminary LIMIT) │
│ ReadFromStorage (SystemNumbers) │
└───────────────────────────────────────────────────────────────────────────┘
EXPLAIN header = 1, actions = 1, description = 1
SELECT number
FROM system.numbers
LIMIT 10
Query id: 4d53ac26-5d3e-4217-9ee2-bd798413c1f6
┌─explain───────────────────────────────────────────────────────────────────┐
│ Expression ((Projection + Before ORDER BY)) │
│ Header: number UInt64 │
│ Actions: INPUT :: 0 -> number UInt64 : 0 │
│ Positions: 0 │
│ SettingQuotaAndLimits (Set limits and quota after reading from storage) │
│ Header: number UInt64 │
│ Limit (preliminary LIMIT) │
│ Header: number UInt64 │
│ Limit 10 │
│ Offset 0 │
│ ReadFromStorage (SystemNumbers) │
│ Header: number UInt64 │
└───────────────────────────────────────────────────────────────────────────┘
-- 需开启三元运算符优化
SET optimize_if_chain_to_multiif = 1;
EXPLAIN SYNTAX
SELECT if(number = 1, 'hello', if(number = 2, 'world', 'atguigu'))
FROM numbers(10)
Query id: d4bd3df8-6a70-4831-8c19-cdfc0ed9da25
┌─explain─────────────────────────────────────────────────────────────┐
│ SELECT multiIf(number = 1, 'hello', number = 2, 'world', 'atguigu') │
│ FROM numbers(10) │
└─────────────────────────────────────────────────────────────────────┘
[root@cloud-mn01 ~]# ll
total 1792572
-rw-r--r-- 1 root root 1271623680 Aug 10 09:58 hits_v1.tar
-rw-r--r-- 1 root root 563968000 Aug 10 09:58 visits_v1.tar
[root@cloud-mn01 ~]# tar -xf hits_v1.tar -C /var/lib/clickhouse # hits_v1 表有 130 多个字段,880 多万条数据
[root@cloud-mn01 ~]# tar -xf visits_v1.tar -C /var/lib/clickhouse # visits_v1 表有 180 多个字段,160 多万条数据
[root@cloud-mn01 ~]# systemctl restart clickhouse-server.service
[root@cloud-mn01 ~]#
在调用 count 函数时,如果使用的是 count() 或者 count(*),且没有 where 条件,则会直接使用 system.tables 的 total_rows
cloud-mn01 :) EXPLAIN SELECT count()FROM datasets.hits_v1;
EXPLAIN
SELECT count()
FROM datasets.hits_v1
Query id: 6ac410cd-81f1-4d96-bd35-2d45bbfb276d
┌─explain──────────────────────────────────────────────┐
│ Expression ((Projection + Before ORDER BY)) │
│ MergingAggregated │
│ ReadFromPreparedSource (Optimized trivial count) │
└──────────────────────────────────────────────────────┘
3 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
如果 count 具体的列字段,则不会使用此项优化
cloud-mn01 :) EXPLAIN SELECT count(CounterID)FROM datasets.hits_v1;
EXPLAIN
SELECT count(CounterID)
FROM datasets.hits_v1
Query id: be5182bb-4808-4e8f-a395-1fedd1bdd0be
┌─explain───────────────────────────────────────────────────────────────────────┐
│ Expression ((Projection + Before ORDER BY)) │
│ Aggregating │
│ Expression (Before GROUP BY) │
│ SettingQuotaAndLimits (Set limits and quota after reading from storage) │
│ ReadFromMergeTree │
└───────────────────────────────────────────────────────────────────────────────┘
5 rows in set. Elapsed: 0.002 sec.
cloud-mn01 :)
当 group by 有 having 子句,但是没有 with cube、with rollup 或者 with totals 修饰的时候,having 过滤会下推到 where 提前过滤。
EXPLAIN SYNTAX
SELECT UserID
FROM datasets.hits_v1
GROUP BY UserID
HAVING UserID = '8585742290196126178'
Query id: 1ecd2b6b-6a0d-400a-aca1-3e9eac0b7874
┌─explain──────────────────────────────┐
│ SELECT UserID │
│ FROM datasets.hits_v1 │
│ WHERE UserID = '8585742290196126178' │
│ GROUP BY UserID │
└──────────────────────────────────────┘
子查询也支持谓词下推
EXPLAIN SYNTAX
SELECT *
FROM
(
SELECT UserID
FROM datasets.visits_v1
)
WHERE UserID = '8585742290196126178'
Query id: 4cece210-36f7-45c3-95a3-acb75a72ad09
┌─explain──────────────────────────────────┐
│ SELECT UserID │
│ FROM │
│ ( │
│ SELECT UserID │
│ FROM datasets.visits_v1 │
│ WHERE UserID = '8585742290196126178' │
│ ) │
│ WHERE UserID = '8585742290196126178' │
└──────────────────────────────────────────┘
EXPLAIN SYNTAX
SELECT sum(UserID * 2)
FROM datasets.visits_v1
Query id: bf6d9094-9c2a-4c70-acc4-574dee713e9a
┌─explain─────────────────┐
│ SELECT sum(UserID) * 2 │
│ FROM datasets.visits_v1 │
└─────────────────────────┘
如果对聚合键,也就是 group by key 使用 min、max、any 聚合函数,则将函数消除
EXPLAIN SYNTAX
SELECT
sum(UserID * 2),
max(VisitID),
max(UserID)
FROM visits_v1
GROUP BY UserID
Query id: 88dc8579-e110-433b-bc52-46d90078b187
┌─explain──────────────┐
│ SELECT │
│ sum(UserID) * 2, │
│ max(VisitID), │
│ UserID │
│ FROM visits_v1 │
│ GROUP BY UserID │
└──────────────────────┘
如果子查询只返回一行数据,在被引用的时候用标量替换
EXPLAIN SYNTAX
WITH (
SELECT sum(bytes)
FROM system.parts
WHERE active
) AS total_disk_usage
SELECT
(sum(bytes) / total_disk_usage) * 100 AS table_disk_usage,
table
FROM system.parts
GROUP BY table
ORDER BY table_disk_usage DESC
LIMIT 10
Query id: dd48033c-d21b-41f2-87f0-78f3c529dc6b
┌─explain─────────────────────────────────────────────────────────────────────────┐
│ WITH identity(CAST(0, 'UInt64')) AS total_disk_usage │
│ SELECT │
│ (sum(bytes_on_disk AS bytes) / total_disk_usage) * 100 AS table_disk_usage, │
│ table │
│ FROM system.parts │
│ GROUP BY table │
│ ORDER BY table_disk_usage DESC │
│ LIMIT 10 │
└─────────────────────────────────────────────────────────────────────────────────┘
如果开启了 optimize_if_chain_to_multiif 参数,三元运算符会被替换成 multiIf 函数
EXPLAIN SYNTAX
SELECT if(number = 1, 'hello', if(number = 2, 'world', 'atguigu'))
FROM numbers(10)
SETTINGS optimize_if_chain_to_multiif = 1
Query id: e1cb685f-a2b9-4121-bd8e-374b0c76d735
┌─explain─────────────────────────────────────────────────────────────┐
│ SELECT multiIf(number = 1, 'hello', number = 2, 'world', 'atguigu') │
│ FROM numbers(10) │
│ SETTINGS optimize_if_chain_to_multiif = 1 │
└─────────────────────────────────────────────────────────────────────┘
采样修饰符只有在 MergeTree engine 表中才有效,且在创建表时需要指定采样策略。
CREATE TABLE datasets.hits_v1
(
`WatchID` UInt64,
`JavaEnable` UInt8,
...
`RequestTry` UInt8
)
ENGINE = MergeTree
...
SAMPLE BY intHash32(UserID)
...
SELECT
Title,
count(*) AS PageViews
FROM hits_v1
SAMPLE 0.1 -- 代表采样 10%的数据,也可以是具体的条数
WHERE CounterID = 57
GROUP BY Title
ORDER BY PageViews DESC
LIMIT 1000
Query id: 269e0b6b-4e78-4282-8e35-f5bdc73c69c3
┌─Title────────────────────────────────────────────────────────────────┬─PageViews─┐
│ │ 77 │
│ Фильмы онлайн на сегодня │ 6 │
│ Сбербанка «Работа, мебель обувь бензор.НЕТ « Новости, аксессионально │ 6 │
└──────────────────────────────────────────────────────────────────────┴───────────┘
cloud-mn01 :) select count(distinct rand()) from hits_v1;
SELECT countDistinct(rand())
FROM hits_v1
Query id: 4c17ea01-14f6-4c83-9990-450bb30a2f59
┌─uniqExact(rand())─┐
│ 8864520 │
└───────────────────┘
1 rows in set. Elapsed: 0.483 sec. Processed 8.87 million rows, 80.31 MB (18.37 million rows/s., 166.26 MB/s.)
cloud-mn01 :) SELECT uniqCombined(rand()) from datasets.hits_v1 ;
SELECT uniqCombined(rand())
FROM datasets.hits_v1
Query id: 63912054-e9ed-47d1-a4db-1923a5c8f9c1
┌─uniqCombined(rand())─┐
│ 8878876 │
└──────────────────────┘
1 rows in set. Elapsed: 0.102 sec. Processed 8.87 million rows, 80.31 MB (86.86 million rows/s., 786.11 MB/s.)
cloud-mn01 :)
ReplacingMergeTree、SummingMergeTree 只是保证最终一致性,更新表数据后的一段时间会出现短暂数据不一致的情况。
-- user_id 是数据去重更新的标识;
-- create_time 是版本号字段,每组数据中 create_time 最大的一行表示最新的数据;
-- deleted 是自定的一个标记位,比如 0 代表未删除,1 代表删除数据。
CREATE TABLE test_a
(
`user_id` UInt64,
`score` String,
`deleted` UInt8 DEFAULT 0,
`create_time` DateTime DEFAULT toDateTime(0)
)
ENGINE = ReplacingMergeTree(create_time)
ORDER BY user_id
INSERT INTO test_a (user_id, score) WITH (
SELECT ['A', 'B', 'C', 'D', 'E', 'F', 'G']
) AS dict
SELECT
number AS user_id,
dict[(number % 7) + 1]
FROM numbers(100000)
在查询语句后增加 FINAL 修饰符,这样在查询的过程中将会执行 Merge 的特殊逻辑(例如数据去重,预聚合等)。
-- 更新数据
INSERT INTO test_a (user_id, score, create_time) WITH (
SELECT ['AA', 'BB', 'CC', 'DD', 'EE', 'FF', 'GG']
) AS dict
SELECT
number AS user_id,
dict[(number % 7) + 1],
now() AS create_time
FROM numbers(5000)
-- 普通查询
SELECT COUNT()
FROM test_a
WHERE user_id < 5000
┌─count()─┐
│ 10000 │
└─────────┘
-- 通过 FINAL 查询
SELECT COUNT()
FROM test_a
FINAL
WHERE user_id < 5000
┌─count()─┐
│ 5000 │
└─────────┘
-- argMax(field1,field2):按照 field2 的最大值取 field1 的值。
-- 当我们更新数据时,会写入一行新的数据,例如上面语句中,通过查询最大的create_time 得到修改后的 score 字段值。
SELECT
user_id ,
argMax(score, create_time) AS score,
argMax(deleted, create_time) AS deleted,
max(create_time) AS ctime
FROM test_a
GROUP BY user_id
HAVING deleted = 0;
-- 创建视图,方便测试
CREATE VIEW view_test_a AS
SELECT
user_id,
argMax(score, create_time) AS score,
argMax(deleted, create_time) AS deleted,
max(create_time) AS ctime
FROM test_a
GROUP BY user_id
HAVING deleted = 0
-- 更新数据
INSERT INTO test_a (user_id, score, create_time) VALUES
-- 查询视图,去重
SELECT *
FROM view_test_a
WHERE user_id = 0
Query id: a11e2648-cba4-4fde-9e95-3a6896f0adca
┌─user_id─┬─score─┬─deleted─┬───────────────ctime─┐
│ 0 │ AAAA │ 0 │ 2021-08-10 11:17:49 │
└─────────┴───────┴─────────┴─────────────────────┘
-- 查询原表,所有记录都在
SELECT *
FROM test_a
WHERE user_id = 0
Query id: 55af213a-f8b7-4238-8456-bc5df1b62562
┌─user_id─┬─score─┬─deleted─┬─────────create_time─┐
│ 0 │ AAAA │ 0 │ 2021-08-10 11:17:49 │
└─────────┴───────┴─────────┴─────────────────────┘
┌─user_id─┬─score─┬─deleted─┬─────────create_time─┐
│ 0 │ A │ 0 │ 1970-01-01 08:00:00 │
└─────────┴───────┴─────────┴─────────────────────┘
-- 删除数据
INSERT INTO test_a (user_id, score, deleted, create_time) VALUES
-- 查询视图,记录消失
SELECT *
FROM view_test_a
WHERE user_id = 0
Query id: c6157128-84ac-4e86-92a9-68aad99b539d
Ok.
0 rows in set. Elapsed: 0.006 sec. Processed 8.19 thousand rows, 188.47 KB (1.47 million rows/s., 33.80 MB/s.)
-- 查询原表,数据都在
SELECT *
FROM test_a
WHERE user_id = 0
Query id: 482cbcdb-f2d1-45b4-ba05-7153c0e0a6ef
┌─user_id─┬─score─┬─deleted─┬─────────create_time─┐
│ 0 │ AAAA │ 0 │ 2021-08-10 11:17:49 │
└─────────┴───────┴─────────┴─────────────────────┘
┌─user_id─┬─score─┬─deleted─┬─────────create_time─┐
│ 0 │ AAAA │ 1 │ 2021-08-10 11:19:10 │
└─────────┴───────┴─────────┴─────────────────────┘
┌─user_id─┬─score─┬─deleted─┬─────────create_time─┐
│ 0 │ A │ 0 │ 1970-01-01 08:00:00 │
└─────────┴───────┴─────────┴─────────────────────┘
也是 create 语法,会创建一个隐藏的目标表来保存视图数据。也可以 TO 表名,保存到
一张显式的表。没有加 TO 表名,表名默认就是 .inner.物化视图名
CREATE [MATERIALIZED] VIEW [IF NOT EXISTS] [db.]table_name [TO[db.]name]
[ENGINE = engine] [POPULATE] AS SELECT ...
-- 创建原始表
CREATE TABLE hits_test
(
`EventDate` Date,
`CounterID` UInt32,
`UserID` UInt64,
`URL` String,
`Income` UInt8
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SETTINGS index_granularity = 8192
-- 插入数据
INSERT INTO hits_test SELECT
EventDate,
CounterID,
UserID,
URL,
Income
FROM datasets.hits_v1
LIMIT 10000
-- 创建物化视图
CREATE MATERIALIZED VIEW hits_mv
ENGINE = SummingMergeTree
PARTITION BY toYYYYMM(EventDate)
ORDER BY (EventDate, intHash32(UserID)) AS
SELECT
UserID,
EventDate,
count(URL) AS ClickCount,
sum(Income) AS IncomeSum
FROM hits_test
WHERE EventDate >= '2014-03-20'
GROUP BY
UserID,
EventDate
-- 查看生成的 inner 表
SHOW TABLES
┌─name───────────────────────────────────────────┐
│ .inner_id.069f7d89-bd86-4ae6-869f-7d89bd86fae6 │
│ hits_mv │
│ hits_test │
└────────────────────────────────────────────────┘
-- 导入增量数据
INSERT INTO hits_test SELECT
EventDate,
CounterID,
UserID,
URL,
Income
FROM datasets.hits_v1
WHERE EventDate >= '2014-03-23'
LIMIT 10
-- 查询物化视图
SELECT *
FROM hits_mv
Query id: 9af4d8b2-7e9d-48d1-b950-a923e95a047c
┌──────────────UserID─┬──EventDate─┬─ClickCount─┬─IncomeSum─┐
│ 8585742290196126178 │ 2014-03-23 │ 8 │ 16 │
│ 1095363898647626948 │ 2014-03-23 │ 2 │ 0 │
└─────────────────────┴────────────┴────────────┴───────────┘
2 rows in set. Elapsed: 0.002 sec.
_version 用作版本参数,每当监听到 insert、update 和 delete 事件时,在 databse 内全局自增;而 _sign 则用于标记是否被删除,取值 1 或者 -1。
需开启 binlog 和 GTID 模式
[root@cloud-dn02 ~]# cat /etc/my.cnf
[mysqld]
user=root
basedir=/opt/module/mysql
datadir=/opt/module/mysql/data
socket=/tmp/mysql.sock
port=3306
# 确保 MySQL 开启了 binlog 功能,且格式为 ROW
server_id=6
log-bin=mysql-bin
binlog_format=ROW
# 开启 GTID 模式, 这种方式在 mysql 主从模式下可以确保数据同步的一致性(主从切换时)。
gtid-mode=on
enforce-gtid-consistency=1
log-slave-updates=1
# the MaterializeMySQL engine requires default_authentication_plugin='mysql_native_password'.
default_authentication_plugin=mysql_native_password
[mysql]
socket=/tmp/mysql.sock
[root@cloud-dn02 ~]#
CREATE DATABASE testck;
CREATE TABLE `testck`.`t_organization` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`code` int NOT NULL,
`name` text DEFAULT NULL,
`updatetime` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY (`code`)
) ENGINE=InnoDB;
INSERT INTO testck.t_organization (code, name,updatetime) VALUES(1000,'Realinsight',NOW());
INSERT INTO testck.t_organization (code, name,updatetime) VALUES(1001, 'Realindex',NOW());
INSERT INTO testck.t_organization (code, name,updatetime) VALUES(1002,'EDT',NOW());
CREATE TABLE `testck`.`t_user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`code` int,
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
INSERT INTO testck.t_user (code) VALUES(1);
SET allow_experimental_database_materialize_mysql = 1
CREATE DATABASE test_binlog
ENGINE = MaterializeMySQL('cloud-dn02:3306', 'testck', 'rayslee', 'abcd1234..')
# 注意使用 root 用户可能存在未知错误:Code: 100. DB::Exception: Received from localhost:9000. DB::Exception: Access denied for user root.
cloud-mn01 :) show tables;
SHOW TABLES
Query id: 95891aee-ffbc-4b0c-abc1-881266785c86
┌─name───────────┐
│ t_organization │
│ t_user │
└────────────────┘
2 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :) select * from t_user;
SELECT *
FROM t_user
Query id: 9fa08f8a-4cd3-4c85-a71d-e3a92237caa8
┌─id─┬─code─┐
│ 1 │ 1 │
└────┴──────┘
1 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :) select * from t_organization;
SELECT *
FROM t_organization
Query id: eb1662db-101e-4132-aa7c-0326099ed084
┌─id─┬─code─┬─name────────┬──────────updatetime─┐
│ 1 │ 1000 │ Realinsight │ 2021-08-11 11:22:34 │
│ 2 │ 1001 │ Realindex │ 2021-08-11 11:22:34 │
│ 3 │ 1002 │ EDT │ 2021-08-11 11:22:34 │
└────┴──────┴─────────────┴─────────────────────┘
3 rows in set. Elapsed: 0.003 sec.
cloud-mn01 :)
[root@cloud-mn01 ~]# tar -zxf /opt/soft/prometheus-2.26.0.linux-amd64.tar.gz -C /opt/module/
[root@cloud-mn01 ~]# ln -s /opt/module/prometheus-2.26.0.linux-amd64 /opt/module/prometheus
[root@cloud-mn01 ~]# vi /opt/module/prometheus/prometheus.yml
[root@cloud-mn01 ~]# tail /opt/module/prometheus/prometheus.yml
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
# 添加 clickhouse 监控
- job_name: 'clickhouse'
static_configs:
- targets: ['cloud-mn01:9363']
[root@cloud-mn01 ~]# nohup /opt/module/prometheus/prometheus --config.file=/opt/module/prometheus/prometheus.yml &>> /var/log/prometheus.log &
[1] 1683
[root@cloud-mn01 ~]#
http://192.168.1.101:9090/targets
[root@cloud-mn01 ~]# tar -zxf /opt/soft/grafana-7.5.2.linux-amd64.tar.gz -C /opt/module/
[root@cloud-mn01 ~]# ln -s /opt/module/grafana-7.5.2 /opt/module/grafana
[root@cloud-mn01 ~]# cd /opt/module/grafana
# 注意:必须进到 Grafana 家目录执行
[root@cloud-mn01 grafana]# nohup ./bin/grafana-server web &>> /var/log/grafana-server.log &
[2] 1748
[root@cloud-mn01 grafana]#
http://192.168.1.101:3000/ 默认用户名密码 admin/admin
[root@cloud-mn01 ~]# vim /etc/clickhouse-server/config.xml
# 打开外部访问
<listen_host>::listen_host>
# 打开监控指标
<prometheus>
<endpoint>/metricsendpoint>
<port>9363port>
<metrics>truemetrics>
<events>trueevents>
<asynchronous_metrics>trueasynchronous_metrics>
<status_info>truestatus_info>
prometheus>
[root@cloud-mn01 ~]# systemctl restart clickhouse-server.service
[root@cloud-mn01 ~]#
https://grafana.com/grafana/dashboards/14432
‘+’ =》 Import
https://github.com/AlexAkulov/clickhouse-backup/releases/tag/v1.0.0
## 该工具目前暂时存在版本兼容性问题(clickhouse 最新版 7 月份发布,该工具最新 6 月份发布)
[root@cloud-mn01 soft]# rpm -ivh clickhouse-backup-1.0.0-1.x86_64.rpm
Preparing... ################################# [100%]
Updating / installing...
1:clickhouse-backup-1.0.0-1 ################################# [100%]
[root@cloud-mn01 soft]# cd /etc/clickhouse-backup/
[root@cloud-mn01 clickhouse-backup]# ll
total 4
-rw-r--r-- 1 root root 1682 Jun 17 00:49 config.yml.example
[root@cloud-mn01 clickhouse-backup]#