参考文献
http://www.oushu.io/docs/ch/g...
http://www.oushu.io/docs/ch/h...
测试的软件版本
PostgreSQL 8.2.15
(OushuDB 3.4.0.0)
(Apache HAWQ 2.4.0.0)
(Greenplum Database 4.2.0 build 1)
on x86_64-unknown-linux-gnu,
compiled by GCC clang version 8.0.1 (tags/RELEASE_801/final)
compiled on Jan 15 2020 05:46:01
语法
DB=# \h CREATE EXTERNAL TABLE
规范
命名规范
ext_XXXXXX
err_XXXXXX
err表需要定期清理
建议做成存储过程定期清理
建立gpfdist外部表
启动gpfdist服务(文件服务器)
nohup gpfdist -d /home/gpadmin -p 8888 > gpfdist.log 2>&1 &
检查gpfdist服务是否存在(文件服务器)
-bash-4.2$ ps -ef | grep gpfdist
gpadmin 507192 493162 0 01:57 pts/0 00:00:00 gpfdist -d /home/gpadmin -p 8888
建立可写gpfdist外部表
create writable external table ext_w_test001(
id bigint,
name varchar(128)
)
location('gpfdist://10.0.0.6:8888/gpextdata/test001.txt')
format 'csv'
encoding 'GB18030';
可写测试
insert into ext_w_test001 select generate_series(1,1000,1),1;
# 插入数据后登陆服务器10.0.0.6(文件服务器)执行下列shell指令
wc -l /home/gpadmin/gpextdata/test001.txt
建立可读gpfdist外部表
create readable external table ext_r_test001(
id bigint,
name varchar(128)
)
location('gpfdist://10.0.0.6:8888/gpextdata/test001.txt')
format 'csv'
encoding 'GB18030'
log errors into err_r_test001
segment reject limit 10 rows;
可读测试
select count(*) from ext_r_test001;
select * from ext_r_test001 limit 5;
删除gpfdist外部表
找出对应的外部表和err表
方案一
External table "public.ext_w_test001"
Column | Type | Modifiers
--------+------------------------+-----------
id | bigint |
name | character varying(128) |
Type: writable
Encoding: GB18030
Format type: csv
Format options: delimiter ',' null '' escape '"' quote '"'
External location: gpfdist://10.0.0.6:8888/gpextdata/test001.txt
Distributed randomly
dw=# \d ext_r_test001
External table "public.ext_r_test001"
Column | Type | Modifiers
--------+------------------------+-----------
id | bigint |
name | character varying(128) |
Type: readable
Encoding: GB18030
Format type: csv
Format options: delimiter ',' null '' escape '"' quote '"'
External location: gpfdist://10.0.0.6:8888/gpextdata/test001.txt
Segment reject limit: 10 rows
Error table: err_r_test001
方案二
SELECT x.location,
x.fmttype,
x.fmtopts,
x.command,
x.rejectlimit,
x.rejectlimittype,
x.writable,
(SELECT relname
FROM pg_class WHERE Oid=x.fmterrtbl) AS errtblname,
pg_catalog.pg_encoding_to_char(x.encoding
),
x.fmterrtbl = x.reloid AS errortofile
FROM pg_catalog.pg_exttable x,
pg_catalog.pg_class c
WHERE x.reloid = c.oid
AND c.oid = 'public.ext_r_test001'::regclass;
删除外部表(删除之前查看建表语法有没有错误表存在)
drop table err_r_test001;
drop external table public.ext_r_test001;
drop external table public.ext_w_test001;
关闭gpfdist服务(文件服务器)
ps -ef | grep gpfdist | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep gpfdist
建立web外部表
启动httpd服务(文件服务器)
service httpd start
service httpd status
检查http服务是否可用
echo "1,2" >/var/www/html/text.csv
wget 10.0.0.4:80/text.csv
建立可读web外部表
create readable external web table ext_r_web_test(
id bigint,
name varchar(128)
)
location ('http://10.0.0.4:80/text.csv')
format 'csv'
encoding 'GB18030'
log errors into err_w_web_test
segment reject limit 10 rows;
可读测试
select * from ext_r_web_test;
关闭httpd服务(文件服务器)
service httpd stop
建立cmd外部表
可写cmd外部表对应脚本准备
echo "cat > /tmp/externala" >/home/gpadmin/unload.sh
建立可写cmd外部表
create writable external web table ext_w_cmd_web(
id bigint,
name varchar(128)
)
execute '/home/gpadmin/unload.sh' on 3
format 'csv'
encoding 'GB18030';
可写测试
insert into ext_w_cmd_web select generate_series(1,30,1),1;
## 检查数据文件
gpssh -f all_hosts "cat /tmp/externala"
可读cmd外部表对应脚本准备
echo "cat /tmp/externala" > /home/gpadmin/load.sh
建立可读cmd外部表
create readable external web table ext_r_cmd_web(
id bigint,
name varchar(128)
)
execute '/home/gpadmin/load.sh' on 3
format 'csv'
encoding 'GB18030'
log errors into err_r_cmd_tab
segment reject limit 10 rows;
可读测试
select count(*) from ext_r_cmd_web;
select * from ext_r_cmd_web;
建立hdfs外部表
建立可写hdfs外部表
CREATE writable EXTERNAL TABLE ext_w_hdfs_test
(id int, name text, sponsor text)
LOCATION ('hdfs://oushu/hawq/default_filespace/test' ) FORMAT 'CSV'
encoding 'GB18030';
可写测试
insert into ext_w_hdfs_test select 1,2,3;
建立可读hdfs外部表
CREATE readable EXTERNAL TABLE ext_r_hdfs_test
(id int, name text, sponsor text)
LOCATION ('hdfs://oushu/hawq/default_filespace/test' ) FORMAT 'CSV'
encoding 'GB18030'
log errors into err_r_hdfs_tab
segment reject limit 10 rows;
可读测试
select count(*) from ext_r_hdfs_test;
select * from ext_r_hdfs_test;