安装:
# tar -zxvf pg_bulkload-3.1.6.tar.gz # cd pg_bulkload-3.1.6 # . /home/postgres/.bash_profile # make # make install
引入扩展:
$ psql psql (9.3.4) Type "help" for help. postgres=# create database test; CREATE DATABASE postgres=# \c test You are now connected to database "test" as user "postgres". test=# create extension pg_bulkload ; CREATE EXTENSION test=# \dx List of installed extensions Name | Version | Schema | Description -------------+---------+------------+----------------------------------------------------------------- pg_bulkload | 1.0 | public | pg_bulkload is a high speed data loading utility for PostgreSQL plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language (2 rows) test=# \df List of functions Schema | Name | Result data type | Argum ent data types | Type --------+-------------+------------------+------------------------------------------------------------------------------------------------------------ ---------------------------------------------------------------------------------------------------------------------+-------- public | pg_bulkload | record | options text[], OUT skip bigint, OUT count bigint, OUT parse_errors bigint, OUT duplicate_new bigint, OUT d uplicate_old bigint, OUT system_time double precision, OUT user_time double precision, OUT duration double precision | normal (1 row)
创造测试数据:
test=# create table t1(id int,name text); CREATE TABLE test=# insert into t1 select generate_series(1,50000000),'HighGo'; INSERT 0 50000000 test=# \d+ List of relations Schema | Name | Type | Owner | Size | Description --------+------+-------+----------+---------+------------- public | t1 | table | postgres | 2112 MB | (1 row) test=# copy t1 to '/opt/pg93/data/t1.csv' with(format 'csv'); COPY 50000000
$ du -sh /opt/pg93/data/t1.csv 754M /opt/pg93/data/t1.csv
copy方式载入数据:
(with logged):
test=# truncate t1; TRUNCATE TABLE test=# \timing Timing is on. test=# test=# copy t1 from '/opt/pg93/data/t1.csv' with(format 'csv'); COPY 50000000 Time: 139038.099 ms
(without logged):
test=# truncate t1; TRUNCATE TABLE 修改t1表为unlogged test=# update pg_class set relpersistence='u' where relname='t1'; UPDATE 1 test=# copy t1 from '/opt/pg93/data/t1.csv' with(format 'csv'); COPY 50000000 Time: 110796.480 ms
pg_blukload载入数据:
(without logged):
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=PARALLEL" -d test NOTICE: BULK LOAD START NOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows. Run began on 2014-06-16 05:53:45.025377+08 Run ended on 2014-06-16 05:55:00.625057+08 CPU 2.80s/33.00u sec elapsed 75.60 sec
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=DIRECT" -d test NOTICE: BULK LOAD START NOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows. Run began on 2014-06-16 06:05:40.267198+08 Run ended on 2014-06-16 06:07:05.08921+08 CPU 6.88s/34.25u sec elapsed 84.82 sec
(with logged):
test=# truncate t1; TRUNCATE TABLE 修改t1表为logged test=# update pg_class set relpersistence='p' where relname='t1'; UPDATE 1
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=PARALLEL" -d test NOTICE: BULK LOAD START NOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows. Run began on 2014-06-16 05:57:05.620751+08 Run ended on 2014-06-16 05:58:20.458029+08 CPU 2.80s/33.02u sec elapsed 74.84 sec $ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=DIRECT" -d test NOTICE: BULK LOAD START NOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows. Run began on 2014-06-16 06:02:10.33344+08 Run ended on 2014-06-16 06:03:36.986382+08 CPU 7.15s/34.93u sec elapsed 86.65 sec
pg_bulkload执行过程中查看进程和连接:
(WRITER=PARALLEL)
$ ps -ef | grep post postgres 24044 22690 0 05:53 pts/2 00:00:00 pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o TYPE=CSV -o WRITER=PARALLEL -d test postgres 24045 2236 48 05:53 ? 00:00:04 postgres: postgres test [local] SELECT postgres 24046 2236 20 05:53 ? 00:00:02 postgres: postgres test [local] SELECT
postgres=# select datname,application_name,query from pg_stat_activity; datname | application_name | query ----------+------------------+------------------------------------------------------------------------------------------------------------------------ ----------------------------------------------------------------------------------------------------------- postgres | psql | select datname,application_name,query from pg_stat_activity; test | pg_bulkload | SELECT * FROM pg_bulkload($1) test | | SELECT * FROM pg_bulkload(ARRAY['TYPE=TUPLE','INPUT=' || $1,'WRITER=DIRECT','OUTPUT=' || $2,'ON_DUPLICATE_KEEP=' || $3, 'DUPLICATE_ERRORS=' || $4,'DUPLICATE_BADFILE=' || $5,'LOGFILE=' || $6,'VERBOSE=' || $7,'TRUNCATE=' || $8]) (3 rows)
(WRITER=PARALLEL)
$ ps -ef | grep post postgres 24124 22690 0 06:02 pts/2 00:00:00 pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o TYPE=CSV -o WRITER=DIRECT -d test postgres 24125 2236 49 06:02 ? 00:00:13 postgres: postgres test [local] SELECT
test=# select datname,application_name,query from pg_stat_activity; datname | application_name | query ---------+------------------+-------------------------------------------------------------- test | pg_bulkload | SELECT * FROM pg_bulkload($1) test | psql | select datname,application_name,query from pg_stat_activity; (2 rows)