Description:
pg_reorg is an utility program to reorganize tables in PostgreSQL databases. Unlike clusterdb, it doesn't block any selections and updates during reorganization. You can choose one of the following methods to reorganize.
Online CLUSTER (ordered by cluster index)
Ordered by specified columns
Online VACUUM FULL (packing rows only)
NOTICE:
Only superusers can use the utility.
Target table must have PRIMARY KEY.
下载地址:http://pgfoundry.org/frs/?group_id=1000411&release_id=2083#pg_reorg-_1.1.10-title-content
[root@masterdb ~]# tar -zxvf pg_reorg-1.1.10.tar.gz
[root@masterdb ~]# cd pg_reorg-1.1.10
[root@masterdb pg_reorg-1.1.10]# . /home/postgres/.bash_profile
[root@masterdb pg_reorg-1.1.10]# make
[root@masterdb pg_reorg-1.1.10]# make install
[postgres@masterdb ~]$ createdb tt
[postgres@masterdb ~]$ psql tt
psql (9.3.4)
Type "help" for help.
tt=# create extension pg_reorg ;
CREATE EXTENSION
tt=# \dx
List of installed extensions
Name | Version | Schema | Description
----------+---------+------------+------------------------------------
pg_reorg | 1.1.10 | public | re-organizes a PostgreSQL database
plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language
(2 rows)
tt=# create table t1(id int primary key,name text);
CREATE TABLE
tt=# insert into t1 select generate_series(1,5000000),'HighGo';
INSERT 0 5000000
tt=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+------+-------+----------+---------+-------------
public | t1 | table | postgres | 211 MB |
(1 row)
tt=# select pg_relation_filepath('t1');
pg_relation_filepath
----------------------
base/16812/16874
(1 row)
tt=# \timing
Timing is on.
tt=# vacuum FULL VERBOSE t1;
INFO: vacuuming "public.t1"
在vacuum full操作进行的同时,在另一终端执行:
tt=# select * from t1 limit 5;
[一直等待vacuum full操作完成]
最终输出信息如下:
tt=# vacuum FULL VERBOSE t1;
INFO: vacuuming "public.t1"
INFO: "t1": found 0 removable, 5000000 nonremovable row versions in 27028 pages
DETAIL: 0 dead row versions cannot be removed yet.
CPU 1.11s/2.71u sec elapsed 5.39 sec.
VACUUM
Time: 22358.823 ms
tt=# select pg_relation_filepath('t1');
pg_relation_filepath
----------------------
base/16812/16878
(1 row)
[表文件发生了变化,索引文件也同时被重组]
[postgres@masterdb ~]$ time pg_reorg -n -t t1 -d tt -e -E DEBUG
LOG: (query) SET statement_timeout = 0
LOG: (query) SET search_path = pg_catalog, pg_temp, public
LOG: (query) SET client_min_messages = warning
LOG: (query) SELECT * FROM reorg.tables WHERE relid = $1::regclass
LOG: (param:0) = t1
INFO: ---- reorganize one table with 7 steps. ----
INFO: target table name : t1
DEBUG: target_oid : 16843
DEBUG: target_toast : 16846
DEBUG: target_tidx : 16848
DEBUG: pkid : 16849
DEBUG: ckid : 0
DEBUG: create_pktype : CREATE TYPE reorg.pk_16843 AS (id integer)
DEBUG: create_log : CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)
DEBUG: create_trigger : CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')
DEBUG: create_table : CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1
DEBUG: drop_columns : (skipped)
DEBUG: delete_log : DELETE FROM reorg.log_16843
DEBUG: lock_table : LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
DEBUG: sql_peek : SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1
DEBUG: sql_insert : INSERT INTO reorg.table_16843 VALUES ($1.*)
DEBUG: sql_delete : DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)
DEBUG: sql_update : UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)
DEBUG: sql_pop : DELETE FROM reorg.log_16843 WHERE id = $1
INFO: ---- STEP1. setup ----
INFO: This needs EXCLUSIVE LOCK against the target table.
LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED
LOG: (query) SET LOCAL statement_timeout = 100
LOG: (query) LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: (query) RESET statement_timeout
LOG: (query) SELECT reorg.conflicted_triggers($1)
LOG: (param:0) = 16843
LOG: (query) CREATE TYPE reorg.pk_16843 AS (id integer)
LOG: (query) CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)
LOG: (query) CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')
LOG: (query) SELECT reorg.disable_autovacuum('reorg.log_16843')
LOG: (query) COMMIT
INFO: ---- STEP2. copy tuples into temp table----
LOG: (query) BEGIN ISOLATION LEVEL SERIALIZABLE
LOG: (query) SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)
LOG: (query) SET LOCAL synchronize_seqscans = off
LOG: (query) SELECT reorg.array_accum(virtualtransaction) FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND (virtualxid, virtualtransaction) <> ('1/1', '-1/0')
LOG: (query) DELETE FROM reorg.log_16843
LOG: (query) CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1
LOG: (query) SELECT reorg.disable_autovacuum('reorg.table_16843')
LOG: (query) COMMIT
INFO: ---- STEP3. create indexes ----
LOG: (query) SELECT indexrelid, reorg.reorg_indexdef(indexrelid, indrelid), indisvalid, pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1
LOG: (param:0) = 16843
DEBUG: [0]
DEBUG: target_oid : 16849
DEBUG: create_index : CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)
LOG: (query) CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)
INFO: ---- STEP4. apply logs ----
LOG: (query) SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)
LOG: (param:0) = SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1
LOG: (param:1) = INSERT INTO reorg.table_16843 VALUES ($1.*)
LOG: (param:2) = DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)
LOG: (param:3) = UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)
LOG: (param:4) = DELETE FROM reorg.log_16843 WHERE id = $1
LOG: (param:5) = 1000
LOG: (query) SELECT pid FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)
LOG: (param:0) = {}
INFO: ---- STEP5. swap tables ----
INFO: This needs EXCLUSIVE LOCK against the target table.
LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED
LOG: (query) SET LOCAL statement_timeout = 100
LOG: (query) LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: (query) RESET statement_timeout
LOG: (query) SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)
LOG: (param:0) = SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1
LOG: (param:1) = INSERT INTO reorg.table_16843 VALUES ($1.*)
LOG: (param:2) = DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)
LOG: (param:3) = UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)
LOG: (param:4) = DELETE FROM reorg.log_16843 WHERE id = $1
LOG: (param:5) = 0
LOG: (query) SELECT reorg.reorg_swap($1)
LOG: (param:0) = 16843
LOG: (query) COMMIT
INFO: ---- STEP6. drop old table----
LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED
LOG: (query) SELECT reorg.reorg_drop($1)
LOG: (param:0) = 16843
LOG: (query) COMMIT
INFO: ---- STEP7. analyze ----
LOG: (query) BEGIN ISOLATION LEVEL READ COMMITTED
LOG: (query) ANALYZE t1
LOG: (query) COMMIT
real 0m21.524s
user 0m0.007s
sys 0m0.006s
[该过程中使用到了一个中间临时表和一个中间日志表以及触发器,通过触发器将重组过程业务中发送的请求语句记录到日志表中,在完成时将日志表中记录的变更同步到中间表中,最后将中间表与实际表调换(通过调换两个表在pg_class中的信息实现),结束后将触发器、中间日志表及新的中间表(原业务表)删除。具体的过程可查看后附的数据库日志。]
在pg_reorg执行的过程中查询数据:
tt=# select * from t1 limit 5;
id | name
----+--------
1 | HighGo
2 | HighGo
3 | HighGo
4 | HighGo
5 | HighGo
(5 rows)
[不会等待pg_reorg执行完毕,因为在重组的过程中未一直将表锁住]
tt=# select pg_relation_filepath('t1');
pg_relation_filepath
----------------------
base/16812/16900
(1 row)
[数据文件发生了变化,当然,索引文件也同时被重组]
表大小 |
索引大小 |
pg_reorg |
vacuum full |
211MB |
107MB |
0m21.524s |
22358.823 ms |
845MB |
428MB |
2m42.015s |
99549.960 ms |
pg_reorg cannot reorganize temp tables.
pg_reorg cannot reorganize tables using GiST indexes.
You cannot do DDL commands except VACUUM and ANALYZE during pg_reorg. In many cases pg_reorg will fail and rollback collectly, but there are some cases which may result in data-corruption .
TRUNCATE
TRUNCATE is lost. Deleted rows still exist after pg_reorg.
CREATE INDEX
It causes index corruptions.
ALTER TABLE ... ADD COLUMN
It causes lost of data. Newly added columns are initialized with NULLs.
ALTER TABLE ... ALTER COLUMN TYPE
It causes data corruptions.
ALTER TABLE ... SET TABLESPACE
It causes data corruptions by wrong relfilenode.
重组过程中会增大I/O压力,执行重组时应避开系统繁忙的时间段;
重组过程中需要创建一些临时对象,所以执行重组时应确保有足够的磁盘空间。
LOG: statement: SET statement_timeout = 0
LOG: statement: SET search_path = pg_catalog, pg_temp, public
LOG: statement: SET client_min_messages = warning
LOG: execute <unnamed>: SELECT * FROM reorg.tables WHERE relid = $1::regclass
DETAIL: parameters: $1 = 't1'
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: SET LOCAL statement_timeout = 100
LOG: statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: statement: RESET statement_timeout
LOG: execute <unnamed>: SELECT reorg.conflicted_triggers($1)
DETAIL: parameters: $1 = '16843'
LOG: statement: CREATE TYPE reorg.pk_16843 AS (id integer)
LOG: statement: CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)
LOG: statement: CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger('INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)')
LOG: statement: SELECT reorg.disable_autovacuum('reorg.log_16843')
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL SERIALIZABLE
LOG: statement: SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)
LOG: statement: SET LOCAL synchronize_seqscans = off
LOG: statement: SELECT reorg.array_accum(virtualtransaction) FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND (virtualxid, virtualtransaction) <> ('1/1', '-1/0')
LOG: statement: DELETE FROM reorg.log_16843
LOG: statement: CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1
LOG: statement: SELECT reorg.disable_autovacuum('reorg.table_16843')
LOG: statement: COMMIT
LOG: execute <unnamed>: SELECT indexrelid, reorg.reorg_indexdef(indexrelid, indrelid), indisvalid, pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1
DETAIL: parameters: $1 = '16843'
LOG: statement: CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USING btree (id)
LOG: execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)
DETAIL: parameters: $1 = 'SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1', $2 = 'INSERT INTO reorg.table_16843 VALUES ($1.*)', $3 = 'DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)', $4 = 'UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)', $5 = 'DELETE FROM reorg.log_16843 WHERE id = $1', $6 = '1000'
LOG: execute <unnamed>: SELECT pid FROM pg_locks WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)
DETAIL: parameters: $1 = '{}'
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: SET LOCAL statement_timeout = 100
LOG: statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: statement: RESET statement_timeout
LOG: execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)
DETAIL: parameters: $1 = 'SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1', $2 = 'INSERT INTO reorg.table_16843 VALUES ($1.*)', $3 = 'DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)', $4 = 'UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)', $5 = 'DELETE FROM reorg.log_16843 WHERE id = $1', $6 = '0'
LOG: execute <unnamed>: SELECT reorg.reorg_swap($1)
DETAIL: parameters: $1 = '16843'
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: execute <unnamed>: SELECT reorg.reorg_drop($1)
DETAIL: parameters: $1 = '16843'
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: ANALYZE t1
LOG: statement: COMMIT