接上篇《AntDB重分布(rebalance)性能对比记录一》,AntDB在重分布方面有优化的空间,本文通过记录一组数据,验证优化方案的可能性。
重分布对各个数据节点(datanode)来说,主要包含两个操作:
由通过插入一张新表来模拟重分布带来的启发,AntDB是否可以通过以下方法实现重分布。每个节点扫描所有记录,通过重分布表达式计算,属于本节点的数据保留,不属于本节点的数据,通过Reduce转发到其他节点上,并插入由其他节点Reduce的数据,最终完成数据重分布。
--10万
create table t_one_million_new2(id int8, value int8) distribute by hash(id) to node(dn1, dn2, dn3);
--100万
create table t_ten_million_new2(id int8, value int8) distribute by hash(id) to node(dn1, dn2, dn3);
--1000万
create table t_hundred_million_new2(id int8, value int8) distribute by hash(id) to node(dn1, dn2, dn3);
--1亿
create table t_thousand_million_new2(id int8, value int8) distribute by hash(id) to node(dn1, dn2, dn3);
explain (verbose, analyze, plan_id)
with tmp as (
delete from t_one_million
where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
returning *)
insert into t_one_million_new2
select *
from tmp;
select node_name, count(1)
from (select xc_node_id,*
from t_one_million
union all
select xc_node_id,*
from t_one_million_new2) a,
pgxc_node b
where a.xc_node_id = b.node_id
group by 1
order by 1;
postgres=# explain (verbose, analyze, plan_id)
postgres-# with tmp as (
postgres(# delete from t_one_million
postgres(# where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
postgres(# returning *)
postgres-# insert into t_one_million_new2
postgres-# select *
postgres-# from tmp;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Cluster Gather (cost=2699.12..3269.39 rows=1841 width=16) (actual time=206.958..206.958 rows=0 loops=1)
Plan id: 0
Remote node: 16385,16386,16387,16388
-> Insert on public.t_one_million_new2 (cost=1.00..18.97 rows=614 width=16) (actual time=0.130..0.130 rows=0 loops=1)
Plan id: 1
Node 16385: (actual time=162.042..162.042 rows=0 loops=1)
Node 16388: (actual time=188.439..188.439 rows=0 loops=1)
Node 16387: (actual time=196.024..196.024 rows=0 loops=1)
Node 16386: (actual time=205.083..205.083 rows=0 loops=1)
CTE tmp
-> Delete on public.t_one_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.046..0.047 rows=0 loops=1)
Plan id: 4
Output: t_one_million.id, t_one_million.value
Node 16385: (actual time=0.190..102.856 rows=25282 loops=1)
Node 16388: (actual time=7.967..82.578 rows=16701 loops=1)
Node 16387: (actual time=8.182..81.934 rows=16699 loops=1)
Node 16386: (actual time=7.412..80.025 rows=16587 loops=1)
-> Seq Scan on public.t_one_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.039..0.039 rows=0 loops=1)
Plan id: 5
Output: t_one_million.id, t_one_million.ctid, t_one_million.xc_node_id
Filter: (('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(t_one_million.id) % 3)), 0)] <> adb_node_oid())
Remote node: 16388,16386,16387,16385
Node 16385: (actual time=0.108..26.268 rows=25282 loops=1)
Node 16388: (actual time=7.866..26.071 rows=16701 loops=1)
Node 16387: (actual time=8.109..26.764 rows=16699 loops=1)
Node 16386: (actual time=7.309..25.298 rows=16587 loops=1)
-> Cluster Reduce (cost=1.00..18.97 rows=614 width=16) (actual time=0.128..0.128 rows=0 loops=1)
Plan id: 2
Reduce: ('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(tmp.id) % 3)), 0)]
Node 16385: (actual time=162.039..162.039 rows=0 loops=1)
Node 16388: (actual time=128.717..139.005 rows=25038 loops=1)
Node 16387: (actual time=125.046..146.805 rows=25174 loops=1)
Node 16386: (actual time=122.414..155.618 rows=25057 loops=1)
-> CTE Scan on tmp (cost=0.00..12.91 rows=460 width=16) (actual time=0.050..0.050 rows=0 loops=1)
Plan id: 3
Output: tmp.id, tmp.value
Node 16385: (actual time=0.198..113.614 rows=25282 loops=1)
Node 16388: (actual time=7.976..89.735 rows=16701 loops=1)
Node 16387: (actual time=8.188..89.122 rows=16699 loops=1)
Node 16386: (actual time=7.418..87.059 rows=16587 loops=1)
Planning time: 1.283 ms
Execution time: 459.616 ms
(42 rows)
Time: 888.970 ms
postgres=# select node_name, count(1)
postgres-# from (select xc_node_id,*
postgres(# from t_one_million
postgres(# union all
postgres(# select xc_node_id,*
postgres(# from t_one_million_new2) a,
postgres-# pgxc_node b
postgres-# where a.xc_node_id = b.node_id
postgres-# group by 1
postgres-# order by 1;
node_name | count
-----------+-------
dn1 | 33343
dn2 | 33209
dn3 | 33448
(3 rows)
Time: 268.398 ms
postgres=# select node_name, count(1) from t_one_million_new a, pgxc_node b where a.xc_node_id = b.node_id group by 1 order by 1;
node_name | count
-----------+-------
dn1 | 33343
dn2 | 33209
dn3 | 33448
(3 rows)
Time: 212.445 ms
explain (verbose, analyze, plan_id)
with tmp as (
delete from t_ten_million
where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
returning *)
insert into t_ten_million_new2
select *
from tmp;
select node_name, count(1)
from (select xc_node_id,*
from t_ten_million
union all
select xc_node_id,*
from t_ten_million_new2) a,
pgxc_node b
where a.xc_node_id = b.node_id
group by 1
order by 1;
postgres=# explain (verbose, analyze, plan_id)
postgres-# with tmp as (
postgres(# delete from t_ten_million
postgres(# where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
postgres(# returning *)
postgres-# insert into t_ten_million_new2
postgres-# select *
postgres-# from tmp;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Cluster Gather (cost=2699.12..3269.39 rows=1841 width=16) (actual time=2580.157..2580.157 rows=0 loops=1)
Plan id: 0
Remote node: 16385,16386,16387,16388
-> Insert on public.t_ten_million_new2 (cost=1.00..18.97 rows=614 width=16) (actual time=0.127..0.127 rows=0 loops=1)
Plan id: 1
Node 16385: (actual time=2060.965..2060.965 rows=0 loops=1)
Node 16386: (actual time=2111.376..2111.376 rows=0 loops=1)
Node 16387: (actual time=2436.112..2436.112 rows=0 loops=1)
Node 16388: (actual time=2574.979..2574.979 rows=0 loops=1)
CTE tmp
-> Delete on public.t_ten_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.044..0.045 rows=0 loops=1)
Plan id: 4
Output: t_ten_million.id, t_ten_million.value
Node 16385: (actual time=0.187..1179.583 rows=250385 loops=1)
Node 16386: (actual time=63.668..831.904 rows=165932 loops=1)
Node 16387: (actual time=63.455..934.034 rows=167032 loops=1)
Node 16388: (actual time=64.036..929.712 rows=166591 loops=1)
-> Seq Scan on public.t_ten_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.040..0.040 rows=0 loops=1)
Plan id: 5
Output: t_ten_million.id, t_ten_million.ctid, t_ten_million.xc_node_id
Filter: (('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(t_ten_million.id) % 3)), 0)] <> adb_node_oid())
Remote node: 16388,16386,16387,16385
Node 16385: (actual time=0.112..225.628 rows=250385 loops=1)
Node 16386: (actual time=63.608..218.627 rows=165932 loops=1)
Node 16387: (actual time=63.408..234.907 rows=167032 loops=1)
Node 16388: (actual time=63.983..234.140 rows=166591 loops=1)
-> Cluster Reduce (cost=1.00..18.97 rows=614 width=16) (actual time=0.125..0.125 rows=0 loops=1)
Plan id: 2
Reduce: ('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(tmp.id) % 3)), 0)]
Node 16385: (actual time=2060.963..2060.963 rows=0 loops=1)
Node 16386: (actual time=1268.951..1416.964 rows=249790 loops=1)
Node 16387: (actual time=1484.659..1581.653 rows=249980 loops=1)
Node 16388: (actual time=1477.651..1593.620 rows=250170 loops=1)
-> CTE Scan on tmp (cost=0.00..12.91 rows=460 width=16) (actual time=0.047..0.047 rows=0 loops=1)
Plan id: 3
Output: tmp.id, tmp.value
Node 16385: (actual time=0.191..1307.779 rows=250385 loops=1)
Node 16386: (actual time=63.671..920.255 rows=165932 loops=1)
Node 16387: (actual time=63.460..1022.836 rows=167032 loops=1)
Node 16388: (actual time=64.041..1022.762 rows=166591 loops=1)
Planning time: 1.197 ms
Execution time: 2611.913 ms
(42 rows)
Time: 2840.856 ms
postgres=# select node_name, count(1)
postgres-# from (select xc_node_id,*
postgres(# from t_ten_million
postgres(# union all
postgres(# select xc_node_id,*
postgres(# from t_ten_million_new2) a,
postgres-# pgxc_node b
postgres-# where a.xc_node_id = b.node_id
postgres-# group by 1
postgres-# order by 1;
node_name | count
-----------+--------
dn1 | 333622
dn2 | 332909
dn3 | 333469
(3 rows)
Time: 1290.778 ms
postgres=# select node_name, count(1) from t_ten_million_new a, pgxc_node b where a.xc_node_id = b.node_id group by 1 order by 1;
node_name | count
-----------+--------
dn1 | 333622
dn2 | 332909
dn3 | 333469
(3 rows)
Time: 1127.809 ms
explain (verbose, analyze, plan_id)
with tmp as (
delete from t_hundred_million
where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
returning *)
insert into t_hundred_million_new2
select *
from tmp;
select node_name, count(1)
from (select xc_node_id,*
from t_hundred_million
union all
select xc_node_id,*
from t_hundred_million_new2) a,
pgxc_node b
where a.xc_node_id = b.node_id
group by 1
order by 1;
postgres=# explain (verbose, analyze, plan_id)
postgres-# with tmp as (
postgres(# delete from t_hundred_million
postgres(# where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
postgres(# returning *)
postgres-# insert into t_hundred_million_new2
postgres-# select *
postgres-# from tmp;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------
Cluster Gather (cost=2699.12..3269.39 rows=1841 width=16) (actual time=23802.106..23802.106 rows=0 loops=1)
Plan id: 0
Remote node: 16385,16386,16387,16388
-> Insert on public.t_hundred_million_new2 (cost=1.00..18.97 rows=614 width=16) (actual time=0.131..0.131 rows=0 loops=1)
Plan id: 1
Node 16385: (actual time=20565.963..20565.963 rows=0 loops=1)
Node 16387: (actual time=22259.154..22259.154 rows=0 loops=1)
Node 16388: (actual time=22810.022..22810.022 rows=0 loops=1)
Node 16386: (actual time=23784.276..23784.276 rows=0 loops=1)
CTE tmp
-> Delete on public.t_hundred_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.044..0.045 rows=0 loops=1)
Plan id: 4
Output: t_hundred_million.id, t_hundred_million.value
Node 16385: (actual time=0.214..11428.316 rows=2497291 loops=1)
Node 16387: (actual time=486.838..8149.738 rows=1667058 loops=1)
Node 16388: (actual time=473.074..8031.809 rows=1667088 loops=1)
Node 16386: (actual time=476.968..9090.809 rows=1666677 loops=1)
-> Seq Scan on public.t_hundred_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.040..0.040 rows=0 loops=1)
Plan id: 5
Output: t_hundred_million.id, t_hundred_million.ctid, t_hundred_million.xc_node_id
Filter: (('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(t_hundred_million.id) % 3)), 0)] <> adb_node_oid())
Remote node: 16388,16386,16387,16385
Node 16385: (actual time=0.138..2133.664 rows=2497291 loops=1)
Node 16387: (actual time=486.791..1972.714 rows=1667058 loops=1)
Node 16388: (actual time=473.019..1693.478 rows=1667088 loops=1)
Node 16386: (actual time=476.906..1952.976 rows=1666677 loops=1)
-> Cluster Reduce (cost=1.00..18.97 rows=614 width=16) (actual time=0.129..0.129 rows=0 loops=1)
Plan id: 2
Reduce: ('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(tmp.id) % 3)), 0)]
Node 16385: (actual time=20565.962..20565.962 rows=0 loops=1)
Node 16387: (actual time=13131.765..14239.031 rows=2498596 loops=1)
Node 16388: (actual time=13783.951..14884.326 rows=2502704 loops=1)
Node 16386: (actual time=14427.555..15564.555 rows=2496814 loops=1)
-> CTE Scan on tmp (cost=0.00..12.91 rows=460 width=16) (actual time=0.048..0.048 rows=0 loops=1)
Plan id: 3
Output: tmp.id, tmp.value
Node 16385: (actual time=0.219..12673.297 rows=2497291 loops=1)
Node 16387: (actual time=486.841..8941.105 rows=1667058 loops=1)
Node 16388: (actual time=473.080..8689.941 rows=1667088 loops=1)
Node 16386: (actual time=476.972..9927.592 rows=1666677 loops=1)
Planning time: 1.261 ms
Execution time: 23878.725 ms
(42 rows)
Time: 24123.468 ms
postgres=# select node_name, count(1)
postgres-# from (select xc_node_id,*
postgres(# from t_hundred_million
postgres(# union all
postgres(# select xc_node_id,*
postgres(# from t_hundred_million_new2) a,
postgres-# pgxc_node b
postgres-# where a.xc_node_id = b.node_id
postgres-# group by 1
postgres-# order by 1;
node_name | count
-----------+---------
dn1 | 3337145
dn2 | 3330709
dn3 | 3332146
(3 rows)
Time: 10990.656 ms
postgres=# select node_name, count(1) from t_hundred_million_new a, pgxc_node b where a.xc_node_id = b.node_id group by 1 order by 1;
node_name | count
-----------+---------
dn1 | 3337145
dn2 | 3330709
dn3 | 3332146
(3 rows)
Time: 9582.919 ms
explain (verbose, analyze, plan_id)
with tmp as (
delete from t_thousand_million
where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
returning *)
insert into t_thousand_million_new2
select *
from tmp;
select node_name, count(1)
from (select xc_node_id,*
from t_thousand_million
union all
select xc_node_id,*
from t_thousand_million_new2) a,
pgxc_node b
where a.xc_node_id = b.node_id
group by 1
order by 1;
postgres=# explain (verbose, analyze, plan_id)
postgres-# with tmp as (
postgres(# delete from t_thousand_million
postgres(# where ('[0:2]={16388,16386,16387}' ::oid[])[COALESCE(int4abs((hashint8(id) % 3)), 0)] != adb_node_oid()
postgres(# returning *)
postgres-# insert into t_thousand_million_new2
postgres-# select *
postgres-# from tmp;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Cluster Gather (cost=2699.12..3269.39 rows=1841 width=16) (actual time=303480.427..303480.427 rows=0 loops=1)
Plan id: 0
Remote node: 16385,16386,16387,16388
-> Insert on public.t_thousand_million_new2 (cost=1.00..18.97 rows=614 width=16) (actual time=0.122..0.122 rows=0 loops=1)
Plan id: 1
Node 16385: (actual time=233747.394..233747.394 rows=0 loops=1)
Node 16387: (actual time=250731.393..250731.393 rows=0 loops=1)
Node 16386: (actual time=296341.588..296341.588 rows=0 loops=1)
Node 16388: (actual time=303356.287..303356.287 rows=0 loops=1)
CTE tmp
-> Delete on public.t_thousand_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.041..0.042 rows=0 loops=1)
Plan id: 4
Output: t_thousand_million.id, t_thousand_million.value
Node 16385: (actual time=0.207..129183.374 rows=24997800 loops=1)
Node 16387: (actual time=4318.516..90104.208 rows=16672669 loops=1)
Node 16386: (actual time=3918.944..100601.275 rows=16666117 loops=1)
Node 16388: (actual time=3858.099..100402.508 rows=16667591 loops=1)
-> Seq Scan on public.t_thousand_million (cost=0.00..12.91 rows=460 width=18) (actual time=0.038..0.038 rows=0 loops=1)
Plan id: 5
Output: t_thousand_million.id, t_thousand_million.ctid, t_thousand_million.xc_node_id
Filter: (('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(t_thousand_million.id) % 3)), 0)] <> adb_node_oid())
Remote node: 16388,16386,16387,16385
Node 16385: (actual time=0.138..23994.713 rows=24997800 loops=1)
Node 16387: (actual time=4318.478..21444.014 rows=16672669 loops=1)
Node 16386: (actual time=3918.890..19805.282 rows=16666117 loops=1)
Node 16388: (actual time=3858.047..19951.104 rows=16667591 loops=1)
-> Cluster Reduce (cost=1.00..18.97 rows=614 width=16) (actual time=0.120..0.120 rows=0 loops=1)
Plan id: 2
Reduce: ('[0:2]={16388,16386,16387}'::oid[])[COALESCE(int4abs((hashint8(tmp.id) % 3)), 0)]
Node 16385: (actual time=233747.393..233747.393 rows=0 loops=1)
Node 16387: (actual time=150785.753..161866.379 rows=25000997 loops=1)
Node 16386: (actual time=166170.688..178206.491 rows=24998324 loops=1)
Node 16388: (actual time=173194.376..184757.500 rows=25004856 loops=1)
-> CTE Scan on tmp (cost=0.00..12.91 rows=460 width=16) (actual time=0.045..0.045 rows=0 loops=1)
Plan id: 3
Output: tmp.id, tmp.value
Node 16385: (actual time=0.211..143127.262 rows=24997800 loops=1)
Node 16387: (actual time=4318.521..99346.392 rows=16672669 loops=1)
Node 16386: (actual time=3918.948..116278.998 rows=16666117 loops=1)
Node 16388: (actual time=3858.103..115040.120 rows=16667591 loops=1)
Planning time: 1.233 ms
Execution time: 303513.533 ms
(42 rows)
Time: 303742.111 ms
postgres=# select node_name, count(1)
postgres-# from (select xc_node_id,*
postgres(# from t_thousand_million
postgres(# union all
postgres(# select xc_node_id,*
postgres(# from t_thousand_million_new2) a,
postgres-# pgxc_node b
postgres-# where a.xc_node_id = b.node_id
postgres-# group by 1
postgres-# order by 1;
node_name | count
-----------+----------
dn1 | 33340379
dn2 | 33327029
dn3 | 33332592
(3 rows)
Time: 127507.301 ms
postgres=# select node_name, count(1) from t_thousand_million_new a, pgxc_node b where a.xc_node_id = b.node_id group by 1 order by 1;
node_name | count
-----------+----------
dn1 | 33340379
dn2 | 33327029
dn3 | 33332592
(3 rows)
Time: 104912.304 ms
通过对比结果可以发现,本方案结果正确。同等条件下,本方案效率要比插入一张新表低,但比原PGXC的重分布方案高很多,可以尝试本方案。
数据 | 重分布(ms) | 插入新表(ms) | 插入&删除(ms) |
---|---|---|---|
10万 | 1241.957 | 266.723 | 459.616 |
100万 | 7308.235 | 1355.531 | 2611.913 |
1000万 | 67223.163 | 17984.772 | 23878.725 |
1亿 | 682586.634 | 197551.749 | 303513.533 |