systemctl status firewalld.service
# vim /etc/security/limits.conf
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
# vim /etc/security/limits.d/20-nproc.conf
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
sudo rpm -ivh *.rpm
# 查看安装情况
sudo rpm -qa|grep clickhouse
vim /etc/clickhouse-server/config.xml
systemctl start clickhouse-server
# 关闭开机自启动
systemctl disable clickhouse-server
export LATEST_VERSION=22.3.3.44
# 1.
tar -zxvf clickhouse-common-static-$LATEST_VERSION-amd64.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
# 2.
tar -zxvf clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
# 3.
tar -zxvf clickhouse-client-$LATEST_VERSION-amd64.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
# 4. 需要设置密码
tar -zxvf clickhouse-server-$LATEST_VERSION-amd64.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo clickhouse start
另外,clickhouse在安装时,会默认创建一个clickhouse用户来部署这些文件。所以,如果不是使用root用户进行操作的话,需要注意下用户权限的问题。
docker run -di --name docker_ch -p 8123:8123 -p 9000:9000 --ulimit nofile=262144:262144 --volume=/Volumes/D/dokcer_data/clickhouse:/var/lib/clickhouse clickhouse/clickhouse-server
类型 | 数值范围 | Java类型 | |
---|---|---|---|
有符号整型 | Int8 | [ − 128 , 127 ] [-128,\ 127] [−128, 127] | byte |
Int16 | [ − 32768 , 32767 ] [-32768,\ 32767] [−32768, 32767] | short | |
Int32 | [ − 2147483648 , 2147483647 ] [-2147483648,\ 2147483647] [−2147483648, 2147483647] | int | |
Int64 | [ − 9223372036854775808 , 9223372036854775807 ] [-9223372036854775808,\ 9223372036854775807] [−9223372036854775808, 9223372036854775807] | long | |
无符号整型 | UInt8 | [ 0 , 255 ] [0,\ 255] [0, 255] | |
UInt16 | [ 0 , 65535 ] [0,\ 65535] [0, 65535] | ||
UInt32 | [ 0 , 4294967295 ] [0,\ 4294967295] [0, 4294967295] | ||
UInt64 | [ 0 , 18446744073709551615 ] [0,\ 18446744073709551615] [0, 18446744073709551615] |
-- 创建表
create table t_enum (
x Enum8('hello' = 1, 'world' = 2)
) ENGINE = TinyLog;
-- 插入数据
insert into t_enum values('hello'), ('world'), ('hello');
-- 查询
select * from t_enum;
-- 尝试插入任何其它值,会报错
insert into t_enum values('hehe');
-- 查看对应行的数值
select cast(x, 'Int8') from t_enum;
类型 | 说明 | |
---|---|---|
Date | ‘年-月-日’ 字符串 | 2022-04-12 |
Datetime | ‘年-月-日时:分:秒’ 字符串 | 2022-04-12 12:12:12 |
Datetime64 | ‘年-月-日时:分:秒.毫秒’ 字符串 | 2022-04-12 12:12:12:123 |
-- 创建数组方式一:使用 array 函数
select array(1, 2) as x, toTypeName(x);
-- 创建数组方式二:使用方括号
select [1, 2] as x, toTypeName(x);
CREATE DATABASE test [ENGINE = Atomic]
CREATE DATABASE IF NOT EXISTS mysqldb ENGINE = MySQL('node01:3306', 'test', 'root', '123456');
CREATE TABLE t_tinylog (id Int32, name String) ENGINE = TinyLog;
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
[SETTINGS name=value, ...]
-- 创建订单表
create table t_order (
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = MergeTree
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
-- 插入数据
insert into t_order values (101, 'sku_001', 1000.00, '2022-04-12 11:00:00'), (102, 'sku_002', 1500.00, '2022-04-12 10:30:00'), (102, 'sku_003', 2500.00, '2022-04-12 12:30:00'), (102, 'sku_002', 1500.00, '2022-04-12 13:00:00'), (102, 'sku_002', 10000.00, '2022-04-12 13:00:00'), (102, 'sku_002', 800.00, '2022-04-13 12:00:00');
[root@centos128 t_order]# cd /var/lib/clickhouse/data/test/t_order/
[root@centos128 t_order]# tree .
.
├── 20220412_1_1_0
│ ├── checksums.txt # 校验文件,用于校验各个文件的正确性,存放各个文件的 size 以及 hash 值
│ ├── columns.txt # 表的结构信息
│ ├── count.txt # 当前分区的数据条数,所以对于clickhouse来说,查表的行数非常非常快
│ ├── data.bin # 数据文件
│ ├── data.mrk3 # 标记文件,在 idx 索引文件和 bin 数据文件之间起到桥梁作用
│ ├── default_compression_codec.txt
│ ├── minmax_create_time.idx # 分区键的最大最小值
│ ├── partition.dat
│ └── primary.idx # 主键索引文件,用于加快查询效率
├── 20220413_2_2_0
│ ├── checksums.txt
│ ├── columns.txt
│ ├── count.txt
│ ├── data.bin
│ ├── data.mrk3
│ ├── default_compression_codec.txt
│ ├── minmax_create_time.idx
│ ├── partition.dat
│ └── primary.idx
├── detached
└── format_version.txt
PartitionId_MinBlockNum_MaxBlockNum_Level:分区_最小分区块编号_最大分区块编号_合并层级
并行:分区后,面对涉及跨分区的查询统计,ClickHouse 会以分区为单位进行处理。
数据写入和分区合并:任何一个批次的数据写入都会产生一个临时分区,不会纳入任何一个已有的分区。写入后的某个时刻(大概 10-15 分钟后),ClickHouse 会自动执行合并操作(等不及也可以手动通过 optimize 执行),把临时分区的数据,合并到已有分区中。
OPTIMIZE TABLE ${table_name} FINAL;
# pwd
/var/log/clickhouse-server
less clickhouse-server.log | grep 'c145f3c8-0833-4fa3-9dc1-17adf3ba65b7' -C 10
set allow_experimental_data_skipping_indices=1;
ALTER TABLE t_order ADD INDEX idx_total_amount total_amount TYPE minmax GRANULARITY 5;
clickhouse-client --send_logs_level=trace <<< 'select * from test.t_order where total_amount > toDecimal32(1200., 2)' --password
-- 创建测试表
create table t_order2 (
id UInt32,
sku_id String,
total_amount Decimal(16, 2) TTL create_time + interval 10 SECOND,
create_time Datetime
) engine = MergeTree
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
-- 插入数据
insert into t_order2 values (101, 'sku_001', 1000.00, '2022-04-15 09:00:00'), (102, 'sku_002', 1500.00, '2022-04-15 10:30:00'), (102, 'sku_003', 2500.00, '2022-04-15 12:30:00'), (102, 'sku_002', 1500.00, '2022-04-15 13:00:00'), (102, 'sku_002', 10000.00, '2022-04-15 13:00:00'), (102, 'sku_002', 800.00, '2022-04-15 12:00:00');
TTL expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
[WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
-- TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作:
-- DELETE - 删除过期的行(默认操作);
-- TO DISK 'aaa' - 将数据片段移动到磁盘 aaa;
-- TO VOLUME 'bbb' - 将数据片段移动到卷 bbb;
-- GROUP BY - 聚合过期的行。
-- 例如:下面这条语句是整行数据会在 create_time 之后 10 秒丢失
ALTER TABLE t_order2 MODIFY TTL create_time + INTERVAL 10 SECOND;
create table t_order_rmt(
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = ReplacingMergeTree(create_time)
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
insert into t_order_rmt values (101, 'sku_001', 1000.00, '2022-04-15 11:00:00'), (102, 'sku_002', 2000.00, '2022-04-15 12:00:00'), (102, 'sku_003', 2500.00, '2022-04-15 11:20:00'), (102, 'sku_002', 2000.00, '2022-04-15 13:00:00'), (102, 'sku_002', 12000.00, '2022-04-16 12:00:00'), (102, 'sku_002', 800.00, '2022-04-15 13:30:00');
select * from t_order_rmt;
-- 手动合并
OPTIMIZE TABLE t_order_rmt FINAL;
-- 再次查询
select * from t_order_rmt;
create table t_order_smt(
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = SummingMergeTree(total_amount)
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
insert into t_order_smt values (101, 'sku_001', 1000.00, '2022-04-15 11:00:00'), (102, 'sku_002', 2000.00, '2022-04-15 12:00:00'), (102, 'sku_003', 2500.00, '2022-04-15 11:20:00'), (102, 'sku_002', 2000.00, '2022-04-15 13:00:00'), (102, 'sku_002', 12000.00, '2022-04-16 12:00:00'), (102, 'sku_002', 800.00, '2022-04-15 13:30:00');
select * from t_order_smt;
-- 手动合并
OPTIMIZE TABLE t_order_smt FINAL;
-- 再次查询
select * from t_order_smt;
能不能直接执行以下 SQL 得到汇总值?
select total_amount from t_order_smt where sku_id = 'xxx' and create_time = 'yyy';
select sum(total_amount) from t_order_smt where sku_id = 'xxx' and create_time = 'yyy';