PostgreSQL对象重组工具【pg_reorg】

Description

pg_reorg is an utility program to reorganize tables in PostgreSQL databases. Unlike clusterdb, it doesn't block any selections and updates during reorganization. You can choose one of the following methods to reorganize.

 

  • Online CLUSTER (ordered by cluster index)

  • Ordered by specified columns

  • Online VACUUM FULL (packing rows only)

     

     

    NOTICE:

  • Only superusers can use the utility.

  • Target table must have PRIMARY KEY.

     

1、安装

编译安装

下载地址:http://pgfoundry.org/frs/?group_id=1000411&release_id=2083#pg_reorg-_1.1.10-title-content

 

[root@masterdb ~]# tar -zxvf pg_reorg-1.1.10.tar.gz

[root@masterdb ~]# cd pg_reorg-1.1.10

[root@masterdb pg_reorg-1.1.10]# . /home/postgres/.bash_profile

[root@masterdb pg_reorg-1.1.10]# make

[root@masterdb pg_reorg-1.1.10]# make install

引入扩展

[postgres@masterdb ~]$ createdb tt

[postgres@masterdb ~]$ psql tt

psql (9.3.4)

Type "help" for help.

 

tt=# create extension pg_reorg ;

CREATE EXTENSION

tt=# \dx

                     List of installed extensions

   Name   | Version |   Schema   |            Description            

----------+---------+------------+------------------------------------

 pg_reorg | 1.1.10  | public     | re-organizes a PostgreSQL database

 plpgsql  | 1.0     | pg_catalog | PL/pgSQL procedural language

(2 rows)

 

 

2、测试

建立测试表

tt=# create table t1(id int primary key,name text);

CREATE TABLE

tt=# insert into t1 select generate_series(1,5000000),'HighGo';

INSERT 0 5000000

tt=# \d+

                    List of relations

 Schema | Name | Type  |  Owner   |  Size   | Description

--------+------+-------+----------+---------+-------------

 public | t1   | table | postgres | 211 MB  |

(1 row)

 

 

tt=# select pg_relation_filepath('t1');

 pg_relation_filepath

----------------------

 base/16812/16874

(1 row)

使用vacuum full

tt=# \timing

Timing is on.

tt=# vacuum FULL VERBOSE t1;

INFO:  vacuuming "public.t1"

 

vacuum full操作进行的同时,在另一终端执行:

tt=# select * from t1 limit 5;

[一直等待vacuum full操作完成]

 

 

最终输出信息如下:

tt=# vacuum FULL VERBOSE t1;

INFO:  vacuuming "public.t1"

INFO:  "t1": found 0 removable, 5000000 nonremovable row versions in 27028 pages

DETAIL:  0 dead row versions cannot be removed yet.

CPU 1.11s/2.71u sec elapsed 5.39 sec.

VACUUM

Time: 22358.823 ms

 

tt=# select pg_relation_filepath('t1');

 pg_relation_filepath

----------------------

 base/16812/16878

(1 row)

[表文件发生了变化,索引文件也同时被重组]

 

使用pg_reorg

[postgres@masterdb ~]$ time pg_reorg -n -t t1 -d tt -e -E DEBUG

LOG: (query) SET statement_timeout = 0

LOG: (query) SET search_path = pg_catalog, pg_temp, public

LOG: (query) SET client_min_messages = warning

LOG: (query) SELECT * FROM reorg.tables WHERE relid = $1::regclass

LOG:     (param:0) = t1

INFO: ---- reorganize one table with 7 steps. ----

INFO: target table name    : t1

DEBUG: target_oid     : 16843

DEBUG: target_toast   : 16846

DEBUG: target_tidx    : 16848

DEBUG: pkid           : 16849

DEBUG: ckid           : 0

DEBUG: create_pktype  : CREATE TYPE reorg.pk_16843 AS (id integer)

DEBUG: create_log     : CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)

DEBUG: create_trigger : CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')

DEBUG: create_table   : CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1

DEBUG: drop_columns   : (skipped)

DEBUG: delete_log     : DELETE FROM reorg.log_16843

DEBUG: lock_table     : LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE

DEBUG: sql_peek       : SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1

DEBUG: sql_insert     : INSERT INTO reorg.table_16843 VALUES ($1.*)

DEBUG: sql_delete     : DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)

DEBUG: sql_update     : UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)

DEBUG: sql_pop        : DELETE FROM reorg.log_16843 WHERE id = $1

INFO: ---- STEP1. setup ----

INFO: This needs EXCLUSIVE LOCK against the target table.

LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED

LOG: (query) SET LOCAL statement_timeout = 100

LOG: (query) LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE

LOG: (query) RESET statement_timeout

LOG: (query) SELECT reorg.conflicted_triggers($1)

LOG:     (param:0) = 16843

LOG: (query) CREATE TYPE reorg.pk_16843 AS (id integer)

LOG: (query) CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)

LOG: (query) CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')

LOG: (query) SELECT reorg.disable_autovacuum('reorg.log_16843')

LOG: (query) COMMIT

INFO: ---- STEP2. copy tuples into temp table----

LOG: (query) BEGIN ISOLATION LEVEL SERIALIZABLE

LOG: (query) SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)

LOG: (query) SET LOCAL synchronize_seqscans = off

LOG: (query) SELECT reorg.array_accum(virtualtransaction) FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND (virtualxid, virtualtransaction) <> ('1/1', '-1/0')

LOG: (query) DELETE FROM reorg.log_16843

LOG: (query) CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1

LOG: (query) SELECT reorg.disable_autovacuum('reorg.table_16843')

LOG: (query) COMMIT

INFO: ---- STEP3. create indexes ----

LOG: (query) SELECT indexrelid, reorg.reorg_indexdef(indexrelid, indrelid), indisvalid, pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1

LOG:     (param:0) = 16843

DEBUG: [0]

DEBUG: target_oid   : 16849

DEBUG: create_index : CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)

LOG: (query) CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)

INFO: ---- STEP4. apply logs  ----

LOG: (query) SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)

LOG:     (param:0) = SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1

LOG:     (param:1) = INSERT INTO reorg.table_16843 VALUES ($1.*)

LOG:     (param:2) = DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)

LOG:     (param:3) = UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)

LOG:     (param:4) = DELETE FROM reorg.log_16843 WHERE id = $1

LOG:     (param:5) = 1000

LOG: (query) SELECT pid FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)

LOG:     (param:0) = {}

INFO: ---- STEP5. swap tables ----

INFO: This needs EXCLUSIVE LOCK against the target table.

LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED

LOG: (query) SET LOCAL statement_timeout = 100

LOG: (query) LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE

LOG: (query) RESET statement_timeout

LOG: (query) SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)

LOG:     (param:0) = SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1

LOG:     (param:1) = INSERT INTO reorg.table_16843 VALUES ($1.*)

LOG:     (param:2) = DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)

LOG:     (param:3) = UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)

LOG:     (param:4) = DELETE FROM reorg.log_16843 WHERE id = $1

LOG:     (param:5) = 0

LOG: (query) SELECT reorg.reorg_swap($1)

LOG:     (param:0) = 16843

LOG: (query) COMMIT

INFO: ---- STEP6. drop old table----

LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED

LOG: (query) SELECT reorg.reorg_drop($1)

LOG:     (param:0) = 16843

LOG: (query) COMMIT

INFO: ---- STEP7. analyze ----

LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED

LOG: (query) ANALYZE t1

LOG: (query) COMMIT

 

real  0m21.524s

user 0m0.007s

sys   0m0.006s

[该过程中使用到了一个中间临时表和一个中间日志表以及触发器,通过触发器将重组过程业务中发送的请求语句记录到日志表中,在完成时将日志表中记录的变更同步到中间表中,最后将中间表与实际表调换(通过调换两个表在pg_class中的信息实现),结束后将触发器、中间日志表及新的中间表(原业务表)删除。具体的过程可查看后附的数据库日志。]

 

pg_reorg执行的过程中查询数据:

tt=# select * from t1 limit 5;

 id |  name 

----+--------

  1 | HighGo

  2 | HighGo

  3 | HighGo

  4 | HighGo

  5 | HighGo

(5 rows)

[不会等待pg_reorg执行完毕,因为在重组的过程中未一直将表锁住]

 

 

tt=# select pg_relation_filepath('t1');

 pg_relation_filepath

----------------------

 base/16812/16900

(1 row)

[数据文件发生了变化,当然,索引文件也同时被重组]

 

对比结果

表大小

索引大小

pg_reorg

vacuum full

211MB

107MB

0m21.524s

22358.823 ms

845MB

428MB

2m42.015s

99549.960 ms

 

3、限制

Temp tables

pg_reorg cannot reorganize temp tables.

 

GiST indexes

pg_reorg cannot reorganize tables using GiST indexes.

 

DDL commands

You cannot do DDL commands except VACUUM and ANALYZE during pg_reorg. In many cases pg_reorg will fail and rollback collectly, but there are some cases which may result in data-corruption .

 

TRUNCATE

TRUNCATE is lost. Deleted rows still exist after pg_reorg.

CREATE INDEX

It causes index corruptions.

ALTER TABLE ... ADD COLUMN

It causes lost of data. Newly added columns are initialized with NULLs.

ALTER TABLE ... ALTER COLUMN TYPE

It causes data corruptions.

ALTER TABLE ... SET TABLESPACE

It causes data corruptions by wrong relfilenode.

 

 

注意:

  1. 重组过程中会增大I/O压力,执行重组时应避开系统繁忙的时间段;

  2. 重组过程中需要创建一些临时对象,所以执行重组时应确保有足够的磁盘空间。

     

LOG:  statement: SET statement_timeout = 0

LOG:  statement: SET search_path = pg_catalog, pg_temp, public

LOG:  statement: SET client_min_messages = warning

LOG:  execute <unnamed>: SELECT * FROM reorg.tables WHERE relid = $1::regclass

DETAIL:  parameters: $1 = 't1'

LOG:  statement: BEGIN ISOLATION LEVEL READ COMMITTED

LOG:  statement: SET LOCAL statement_timeout = 100

LOG:  statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE

LOG:  statement: RESET statement_timeout

LOG:  execute <unnamed>: SELECT reorg.conflicted_triggers($1)

DETAIL:  parameters: $1 = '16843'

LOG:  statement: CREATE TYPE reorg.pk_16843 AS (id integer)

LOG:  statement: CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)

LOG:  statement: CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')

LOG:  statement: SELECT reorg.disable_autovacuum('reorg.log_16843')

LOG:  statement: COMMIT

LOG:  statement: BEGIN ISOLATION LEVEL SERIALIZABLE

LOG:  statement: SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)

LOG:  statement: SET LOCAL synchronize_seqscans = off

LOG:  statement: SELECT reorg.array_accum(virtualtransaction) FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND (virtualxid, virtualtransaction) <> ('1/1', '-1/0')

LOG:  statement: DELETE FROM reorg.log_16843

LOG:  statement: CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1

LOG:  statement: SELECT reorg.disable_autovacuum('reorg.table_16843')

LOG:  statement: COMMIT

LOG:  execute <unnamed>: SELECT indexrelid, reorg.reorg_indexdef(indexrelid, indrelid), indisvalid, pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1

DETAIL:  parameters: $1 = '16843'

LOG:  statement: CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)

LOG:  execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)

DETAIL:  parameters: $1 = 'SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1', $2 = 'INSERT INTO reorg.table_16843 VALUES ($1.*)', $3 = 'DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)', $4 = 'UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)', $5 = 'DELETE FROM reorg.log_16843 WHERE id = $1', $6 = '1000'

LOG:  execute <unnamed>: SELECT pid FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)

DETAIL:  parameters: $1 = '{}'

LOG:  statement: BEGIN ISOLATION LEVEL READ COMMITTED

LOG:  statement: SET LOCAL statement_timeout = 100

LOG:  statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE

LOG:  statement: RESET statement_timeout

LOG:  execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)

DETAIL:  parameters: $1 = 'SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1', $2 = 'INSERT INTO reorg.table_16843 VALUES ($1.*)', $3 = 'DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)', $4 = 'UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)', $5 = 'DELETE FROM reorg.log_16843 WHERE id = $1', $6 = '0'

LOG:  execute <unnamed>: SELECT reorg.reorg_swap($1)

DETAIL:  parameters: $1 = '16843'

LOG:  statement: COMMIT

LOG:  statement: BEGIN ISOLATION LEVEL READ COMMITTED

LOG:  execute <unnamed>: SELECT reorg.reorg_drop($1)

DETAIL:  parameters: $1 = '16843'

LOG:  statement: COMMIT

LOG:  statement: BEGIN ISOLATION LEVEL READ COMMITTED

LOG:  statement: ANALYZE t1

LOG:  statement: COMMIT

你可能感兴趣的:(pg_reorg)