这个引擎是在MergeTree的基础上,添加了==“处理重复数据”==的功能,该引擎和MergeTree的不同之处在于它会删除具有相同(区内排序一样的重复项,数据的去重只会在合并的过程中出现。合并会在未知的时间在后台进行,所以无法预先做出计划,有一些数据可能仍未被处理,因此,ReplacingMergeTree适用于在后台清理重复的数据以节省空间,但它不保证没有重复的数据出现)
create table tb_replace_tree1(
uid UInt8,
name String,
city String
)engine=ReplacingMergeTree()
partition by city
order by uid;
insert into tb_replace_tree1 values
(1,'zs1','BJ'),
(1,'zs2','BJ'),
(1,'ls1','SH'),
(1,'ls2','SH');
select * from tb_replace_tree1; //合并分区时触发去重功能
┌─uid─┬─name─┬─city─┐
│ 1 │ ls2 │ SH │
└─────┴──────┴──────┘
┌─uid─┬─name─┬─city─┐
│ 1 │ zs2 │ BJ │
└─────┴──────┴──────┘
create table tb_replace_tree2(
uid UInt8,
name String,
age UInt8,
city String
)engine=ReplacingMergeTree()
partition by city
primary key uid
order by (uid,age);
insert into tb_replace_tree2 values
(1,'zs1',21,'BJ'),
(1,'zs2',21,'BJ'),
(1,'ls1',21,'SH'),
(1,'ls2',22,'SH');
select * from tb_replace_tree2; //区域内排序一样的重复项,会被去重
┌─uid─┬─name─┬─age─┬─city─┐
│ 1 │ zs2 │ 21 │ BJ │
└─────┴──────┴─────┴──────┘
┌─uid─┬─name─┬─age─┬─city─┐
│ 1 │ ls1 │ 21 │ SH │
│ 1 │ ls2 │ 22 │ SH │
└─────┴──────┴─────┴──────┘
create table tb_replace_tree3(
uid UInt8,
name String,
city String
)engine=ReplacingMergeTree()
partition by city
order by uid;
insert into tb_replace_tree3 values(1,'oldname','BJ'),(1,'newname','BJ');
insert into tb_replace_tree3 values(2,'newname','SH');
insert into tb_replace_tree3 values(2,'oldname','SH');
select * from tb_replace_tree3;
┌─uid─┬─name────┬─city─┐
│ 1 │ newname │ BJ │
└─────┴─────────┴──────┘
┌─uid─┬─name────┬─city─┐
│ 2 │ newname │ SH │
└─────┴─────────┴──────┘
┌─uid─┬─name────┬─city─┐
│ 2 │ oldname │ SH │
└─────┴─────────┴──────┘
optimize table tb_replace_tree3 final; //没有完全去重,需要手动合并
select * from tb_replace_tree3; //此时发现,先插入的newname被删除了,
┌─uid─┬─name────┬─city─┐ //说明未指定ver参数,保留重复数据中最后一行数据
│ 2 │ oldname │ SH │
└─────┴─────────┴──────┘
┌─uid─┬─name────┬─city─┐
│ 1 │ newname │ BJ │
└─────┴─────────┴──────┘
create table tb_replace_tree4(
uid UInt8,
name String,
city String,
version UInt8
)engine=ReplacingMergeTree(version)
partition by city
order by uid;
insert into tb_replace_tree4 values
(1,'zs1','BJ',1),
(1,'zs2','BJ',2),
(1,'ls1','SH',2),
(1,'ls2','SH',1);
select * from tb_replace_tree4; //都保留了版本大的数据
┌─uid─┬─name─┬─city─┬─version─┐
│ 1 │ zs2 │ BJ │ 2 │
└─────┴──────┴──────┴─────────┘
┌─uid─┬─name─┬─city─┬─version─┐
│ 1 │ ls1 │ SH │ 2 │
└─────┴──────┴──────┴─────────┘
insert into tb_replace_tree4 values(1,'zs3','BJ',3);
insert into tb_replace_tree4 values(1,'ls3','SH',1);
optimize table tb_replace_tree4 final;
select * from tb_replace_tree4;
┌─uid─┬─name─┬─city─┬─version─┐
│ 1 │ zs3 │ BJ │ 3 │
└─────┴──────┴──────┴─────────┘
┌─uid─┬─name─┬─city─┬─version─┐
│ 1 │ ls1 │ SH │ 2 │
└─────┴──────┴──────┴─────────┘
一般参数可以用时间来表示
create table tb_replace_tree5(
uid UInt8,
name String,
city String,
time DateTime
)engine=ReplacingMergeTree(time)
partition by city
order by uid;
insert into tb_replace_tree5 values
(1,'zs1','BJ',now()),
(1,'zs2','BJ','2021-04-27 10:00:00'),
(2,'ls1','SH','2021-03-23 04:00:00'),
(2,'ls2','SH','2021-07-29 10:00:00');
select * from tb_replace_tree5;
┌─uid─┬─name─┬─city─┬────────────────time─┐
│ 1 │ zs1 │ BJ │ 2021-08-31 11:22:53 │
└─────┴──────┴──────┴─────────────────────┘
┌─uid─┬─name─┬─city─┬────────────────time─┐
│ 2 │ ls2 │ SH │ 2021-07-29 10:00:00 │
└─────┴──────┴──────┴─────────────────────┘