-- 标准
INSERT INTO [database_name].table_name[(c1, c2, ...)] values(v11, v12, ...), (v21, v22, ...), ...;
-- 从表到表插入
INSERT INTO [database_name].table_name[(c1, c2, ...)] SELECT ...
-- 删除操作
ALTER TABLE t_order_smt DELETE WHERE sku_id = 'sku_001';
-- 更新操作
ALTER TABLE t_order_smt UPDATE total_amount = toDecimal32(2000.00, 2) WHERE id = 102;
-- 先清空表 t_order_mt
create table t_order_mt (
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = MergeTree
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
-- 插入数据
insert into t_order_mt values
(101, 'sku_001', 1000.00, '2022-04-15 09:00:00'),
(102, 'sku_002', 1500.00, '2022-04-15 10:30:00'),
(102, 'sku_003', 2500.00, '2022-04-15 12:30:00'),
(103, 'sku_004', 1500.00, '2022-04-15 13:00:00'),
(104, 'sku_001', 10000.00, '2022-04-15 13:00:00'),
(105, 'sku_002', 800.00, '2022-04-15 12:00:00'),
(106, 'sku_002', 1500.00, '2022-04-15 10:30:00'),
(107, 'sku_003', 2500.00, '2022-04-15 12:30:00'),
(108, 'sku_004', 1500.00, '2022-04-15 13:00:00'),
(109, 'sku_002', 10000.00, '2022-04-15 13:00:00'),
(110, 'sku_003', 800.00, '2022-04-15 12:00:00');
select id, sku_id, sum(total_amount) from t_order_mt group by id, sku_id with rollup;
select id, sku_id, sum(total_amount) from t_order_mt group by id, sku_id with cube;
select id, sku_id, sum(total_amount) from t_order_mt group by id, sku_id with totals;
-- 新增字段
ALTER TABLE [database_name].table_name ADD COLUMN ${new_col_name} ${col_type} after ${old_col_name};
-- 修改字段类型
ALTER TABLE [database_name].table_name MODIFY COLUMN ${new_col_name} ${col_type};
-- 删除字段
ALTER TABLE [database_name].table_name DROP ${col_name};
clickhouse-client --query "select * from test.t_order_mt where create_time='2022-04-15 12:00:00'" --format CSVWithNames > /bigdata/data/rs.csv --password
<yandex>
<zookeeper-servers>
<node index="1">
<host>node01host>
<port>2181port>
node>
<node index="2">
<host>node02host>
<port>2181port>
node>
<node index="3">
<host>node03host>
<port>2181port>
node>
zookeeper-servers>
yandex>
chown clickhouse:clickhouse metrika.xml
<interserver_http_port>9009interserver_http_port>
<interserver_http_host>192.168.x.xinterserver_http_host>
<zookeeper incl="zookeeper-servers" optional="true" />
<include_from>/etc/clickhouse-server/config.d/metrika.xmlinclude_from>
sudo clickhouse restart
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/{table_name}', '{replica}') ...
/clickhouse/tables/
是公共前缀,推荐使用这个;{layer}-{shard}
是分片标识部分,大多数情况来说,只需保留 {shard} 占位符即可;table_name
是该表在 ZooKeeper 中的名称,使其与 ClickHouse 中的表名相同比较好。 这里它被明确定义,跟 ClickHouse 表名不一样,并不会被 RENAME 语句修改。可以在前面添加一个数据库名称 table_name
也是,例如: db_name.table_name
{replica}
占位符
<macros>
<shard>01shard>
<replica>node01replica>
macros>
<macros incl="macros" optional="true" />
-- 分别在 node01、node02 和 node03 两台机器上执行
create table t_order_rep(
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/t_order_rep', '{replica}')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
-- 在 node01 上插入数据
insert into t_order_rep values
(101, 'sku_001', 1000.00, '2022-04-15 09:00:00'),
(102, 'sku_002', 1500.00, '2022-04-15 10:30:00'),
(103, 'sku_004', 1500.00, '2022-04-15 13:00:00'),
(104, 'sku_001', 10000.00, '2022-04-15 13:00:00'),
(105, 'sku_002', 800.00, '2022-04-15 12:00:00');
<remote_servers>
<logs>
<shard>
<weight>1weight>
<internal_replication>falseinternal_replication>
<replica>
<priority>1priority>
<host>example01-01-1host>
<port>9000port>
replica>
<replica>
<host>example01-01-2host>
<port>9000port>
replica>
shard>
<shard>
<weight>2weight>
<internal_replication>falseinternal_replication>
<replica>
<host>example01-02-1host>
<port>9000port>
replica>
<replica>
<host>example01-02-2host>
<secure>1secure>
<port>9440port>
replica>
shard>
logs>
remote_servers>
host
、port
,和可选的 user
、password
、secure
、compression
的参数:
host
– 远程服务器地址。可以域名、IPv4或IPv6。如果指定域名,则服务在启动时发起一个 DNS 请求,并且请求结果会在服务器运行期间一直被记录。如果 DNS 请求失败,则服务不会启动。如果你修改了 DNS 记录,则需要重启服务。port
– 消息传递的 TCP 端口(「tcp_port」配置通常设为 9000)。不要跟 http_port 混淆。user
– 用于连接远程服务器的用户名。默认值:default。该用户必须有权限访问该远程服务器。访问权限配置在 users.xml 文件中。更多信息,请查看«访问权限»部分。password
– 用于连接远程服务器的密码。默认值:空字符串。secure
– 是否使用ssl进行连接,设为true时,通常也应该设置 port
= 9440。服务器也要监听 9440
并有正确的证书。compression
- 是否使用数据压缩。默认值:true。<remote_servers>
<default>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node01host>
<port>9000port>
replica>
<replica>
<host>node02host>
<port>9000port>
replica>
<replica>
<host>node03host>
<port>9000port>
replica>
shard>
default>
remote_servers>
create table t_order_cluster on cluster default(
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = ReplicatedMergeTree
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
向集群写数据的方法有两种:
<remote_servers>
<shard_cluster>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node01host>
<port>9000port>
replica>
<replica>
<host>node02host>
<port>9000port>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node03host>
<port>9000port>
replica>
<replica>
<host>node04host>
<port>9000port>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node05host>
<port>9000port>
replica>
<replica>
<host>node06host>
<port>9000port>
replica>
shard>
shard_cluster>
remote_servers>
node01 | node02 | node03 |
---|---|---|
<yandex>
<remote_servers>
<shard_cluster>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node01host>
<port>9000port>
replica>
<replica>
<host>node02host>
<port>9000port>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>node03host>
<port>9000port>
replica>
shard>
shard_cluster>
remote_servers>
<zookeeper-servers>
<node index="1">
<host>node01host>
<port>2181port>
node>
<node index="2">
<host>node02host>
<port>2181port>
node>
<node index="3">
<host>node03host>
<port>2181port>
node>
zookeeper-servers>
<macros>
<shard>01shard>
<replica>rep_1_1replica>
macros>
yandex>
chown clickhouse:clickhouse metrika-shard.xml
<include_from>/etc/clickhouse-server/config.d/metrika-shard.xmlinclude_from>
create table t_order_sc on cluster shard_cluster (
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = ReplicatedMergeTree
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id, sku_id);
create table t_order_sc_all on cluster shard_cluster (
id UInt32,
sku_id String,
total_amount Decimal(16, 2),
create_time Datetime
) engine = Distributed(shard_cluster, test, t_order_sc, hiveHash(sku_id));
insert into t_order_sc_all values
(101, 'sku_001', 1000.00, '2022-04-22 09:00:00'),
(102, 'sku_002', 1500.00, '2022-04-22 10:30:00'),
(103, 'sku_004', 1500.00, '2022-04-22 13:00:00'),
(104, 'sku_001', 10000.00, '2022-04-23 13:00:00'),
(105, 'sku_002', 800.00, '2022-04-23 12:00:00');
select * from t_order_sc_all;
select * from t_order_sc;
遇到问题:eceived exception from server (version 22.3.3):
Code: 516. DB::Exception: Received from localhost:9000. DB::Exception: Received from node03:9000. DB::Exception: default: Authentication failed: password is incorrect or there is no user with such name. (AUTHENTICATION_FAILED)
需要修改配置: