为了方便,创建以下合并函数
1、创建合并配置表 和 合并统计表
--创建合并配置表
drop table merge_config_table;
CREATE TABLE merge_config_table(tablename varchar,ispartitiontable varchar,to_merge_temp_table varchar,to_merge_table varchar,merge_field varchar,merge_flag int) distributed by (tablename);
alter table merge_config_table drop constraint pk_tablename;
alter table merge_config_table add constraint pk_tablename primary key (tablename);
comment on column merge_config_table.tablename is '需要合并表名称:如需要合并 a 表,那么此处配置为 a 表';
comment on column merge_config_table.ispartitiontable is '是否是分区表: 0 否,1 是';
comment on column merge_config_table.to_merge_table is '待合并表名称:如将 b 表的数据合进 a 表,那么此配置为 b 表';
comment on column merge_config_table.to_merge_temp_table is '待合并临时表名称:如先将 c 表(临时表)的数据合并 b 表,再将 b 表的数据合进 a 表,此处配置为 c 表';
comment on column merge_config_table.merge_field is '合并字段';
comment on column merge_config_table.merge_flag is '是否需要合并:0 否,1 是';
--创建合并统计表
drop table merge_count;
create table merge_count(tablename varchar,merge_count int,total_count int,merge_time date) distributed randomly;
comment column on merge_count.tablename is '合并表名';
comment column on merge_count.merge_count is '合并数据统计';
comment column on merge_count.total_count is '表总数据';
comment column on merge_count.merge_time is '合并时间';
2、为合并配置表添加待合并表
--为配置表配置待合并的表
insert into merge_config_table values('tmp1',0,null,'tmp2','id,name',1)
3、创建合并函数
create or replace function proc_merge_table(tabname varchar,uname varchar)
returns void as
$FUNCTION$
declare
isExist integer;
sqltext text;
merge_cnt int;
orign_count int;
increment_count int;
merge_tabname varchar;
middle_tabname varchar;
totalcnt int;
getresult int;
merge_tablename cursor for select tablename from merge_config_table;
all_field varchar;
field cursor for select merge_field from merge_config_table where tablename = lower(tabname) and merge_flag = 1;
begin
if lower(tabname) <> '' and uname <> '' then
--指定表合并
SELECT count(*) INTO isExist
FROM pg_tables pt,
merge_config_table mct
WHERE pt.tablename = lower(mct.tablename)
AND mct.tablename = lower(tabname)
AND pt.tableowner = lower(uname);
if isExist = 0 then
raise notice 'The query table % does not exists , please check tablename or username !',tabname;
else
raise notice '合并的表名为: % ',tabname;
--获取合并字段
open field;
fetch field into all_field;
--从配置表中将合并的表取出
select to_merge_table into middle_tabname
from merge_config_table
where tablename = lower(tabname)
and merge_flag = 1;
raise notice '字段为: %' ,all_field;
sqltext := 'insert into ' || lower(tabname) ||
' select * from ' || middle_tabname ||
' where (' || all_field ||
') not in ' ||
' ( select ' || all_field || ' from ' || lower(tabname) || ' )';
execute sqltext;
GET DIAGNOSTICS getresult := ROW_COUNT;
if getresult <> 0 then
raise notice '合并 % 行',getresult;
sqltext := 'insert into merge_count(tablename,merge_count,merge_time)
values('''||tabname || ''','''|| getresult||''','''||now()::date||''')';
execute sqltext;
else
raise notice '未合并数据: %',getresult;
end if;
raise notice 'sqltext : %',sqltext;
close field;
end if;
else
--合并所有表
open merge_tablename;
fetch merge_tablename into merge_tabname;
end if;
end;
$FUNCTION$
language plpgsql;
4、初始化表数据
truncate table tmp2;
truncate table tmp1;
truncate table merge_count;
insert into tmp1 values(1,'a');
insert into tmp1 values(1,'a');
insert into tmp1 values(2,'b');
insert into tmp1 values(3,'c');
insert into tmp1 values(4,'d');
insert into tmp1 values(1,'a');
insert into tmp2 values(2,'b');
insert into tmp2 values(3,'c');
insert into tmp2 values(4,'d');
insert into tmp2 values(5,'f');
insert into tmp2 values(6,'e');
insert into tmp2 values(7,'e');
insert into tmp2 values(8,'e');
insert into tmp2 values(9,'e');
5、执行合并函数
select proc_merge_table('tmp1','test');
NOTICE: 合并的表名为: tmp1
NOTICE: 字段为: id,name
NOTICE: 合并 5 行
NOTICE: sqltext : insert into merge_count(tablename,merge_count,merge_time)
DETAIL: values('tmp1','5','2019-06-30')
proc_merge_table
------------------
(1 row)
6、查看合并统计表记录
tablename | merge_count | total_count | merge_time
-----------+-------------+-------------+------------
tmp1 | 5 | | 2019-06-30
(1 row)
以上函数可以根据实际需求稍作修改即可满足