更新时间20210923
目录
1.获取分布键
2. 获取主键
3.查看视图注释
4.改变表所属schema
5.查找某个schema里有什么个表
6.一个merge的举例
7.根据mpp表生成hive的建表语句(表名加_di后缀)
8.分区操作
①.查看表分区
②设置每月一个分区(区间)
③设置上界
④删除分区
⑤新增分区
⑥分区合并
⑦分区分割
⑧重命名分区
9.列操作
①查询列信息
②拼接所有列
10.数据去重(未优化版本)
11.建立与使用序列
12.查看表数据在DN上的分布倾斜情况
select
attname
from pg_attribute
where attrelid = '表schema.表名称'::regclass
and not attisdropped
and attnum in (SELECT unnest(pcattnum) from PGXC_CLASS where pcrelid = '表schema.表名称'::regclass)
select
a.attname as pk_name
from
pg_index i
join pg_attribute a on a.attrelid = i.indrelid and a.attnum = any(i.indkey)
where
i.indrelid = format('%s.%s', ?, ?)::regclass
and i.indisprimary
-- 方法一视图字段注释
select
pa.attnum,
pa.attname, quote_literal(col_description('视图schema.视图名称'::regclass::oid,pa.attnum)) attdesc
from
pg_attribute pa
where
pa.attnum > 0 and not pa.attisdropped
and pa.attrelid = '视图schema.视图名称'::regclass::oid and attdescis not null
-- 方法二获取视图注释
select
string_agg(
format('COMMENT ON COLUMN "%s"."%s"."%s" IS %s;',
table_schema,
table_name,
column_name,
quote_literal(col_description((table_schema || '.' || table_name)::regclass::oid, ordinal_position)))
, E'\n'
) as comment_ddl
from
information_schema.columns
where
table_schema = 'jwy_odm' and table_name = 'myv'
alter table 表 set schema 表schema
SELECT
'表schema.'||table_name table_name
FROM
information_schema.tables
WHERE
table_schema = '表schema'
-- and table_name ~ 's_[^_]+_t_.*'
-- and table_name not like 's_code%'
and table_type = 'BASE TABLE'
-- 带注释
select
distinct
pv.table_name,
cast(obj_description(cl.relfilenode,'pg_class') as varchar)as comment
from
pg_class pc
left join pg_namespace ns on pc.relnamespace = ns.oid
left join information_schema.table_privileges pv on pv.table_schema = ns.nspname and pv.table_name = pc.relname
left join pg_class cl on cl.relname = pv.table_name
where
pv.table_schema is not null
and pv.table_schema = 'odm'
and pv.table_name ~ 'o_.*'
order by table_name
-- and pv.grantee <> 'odm'
-- pc.relkind = 'v' and pc.oid = oid and
MERGE INTO 表sche.主表名称1 AS s
USING 表sche.次表名称2 as o
-- 条件可以有多个
ON ( s.id = o.id AND o.xxrksj > to_date('19001010'))
WHEN MATCHED THEN
UPDATE SET
-- 符合条件就更新
s.rybh = o.rybh,
s.zpxh = o.zpxh
WHEN NOT MATCHED THEN
-- 不符合条件就插入
INSERT ( id, rybh)
VALUES (o.id, o.rybh) ;
with cols as (
select
table_schema || '.' || table_name as sat,
column_name as cn,
case
when data_type in ('text') then 'string'
when data_type in ('nvarchar2', 'character varying', 'character', '"char"') and character_maximum_length is null then 'string'
when data_type in ('nvarchar2', 'character varying', 'character', '"char"') then 'varchar(' || character_maximum_length || ')'
when data_type in ('time without time zone', 'time with time zone') then 'string'
when data_type in ('timestamp with time zone', 'timestamp without time zone') then 'timestamp'
when data_type in ('tinyint', 'smallint', 'integer', 'bigint', 'interval') then data_type
when data_type in ('double precision') then 'double'
when data_type = 'bytea' then 'string'
when data_type = 'numeric' and nvl(numeric_scale,0) <> 0 then 'double'
when data_type = 'numeric' then 'integer'
else 'null-' || data_type
end as dt,
QUOTE_LITERAL(col_description(sat::regclass::oid, ordinal_position) )as cm
from
information_schema.columns
where
table_schema = '表schema'
-- 过滤条件
-- and table_name ~ 's_[^_]+_t_.*'
and table_name not like 's_code%'
-- 特出处理
-- and table_name in ('表名称1', '表名称2')
order by ordinal_position
)
select
quote_literal(obj_description(sat::regclass::oid)) tm,
E'-- ' || sat || E'_di \n'
|| 'create table if not exists ' || sat || '_di' || E'(\n'
|| string_agg(' `' || cols.cn || '` ' || cols.dt ||
case when cols.cm is null then null else ' comment ' || cols.cm end
, E',\n')
|| E')\n'
|| case when tm is null then null else E'comment ' || tm || E'\n' end
|| E'partitioned by (etl_rkrq string)\n'
|| E'stored as orc;\n'
|| E'grant select, update, insert, delete on table '|| sat || '_di to user 授权用户名;'
|| E'\n\n' as ddl
from cols
group by sat
order by sat
自动建立的分区的命名应该是P_1 , P_2 , P_3 ...
CREATE TABLE "odm"."test_partition" (
"id" int4 NOT NULL,
"sj" timestamp(6),
"ms" varchar(255)
)
PARTITION by RANGE(sj) (
PARTITION P_MIN VALUES LESS THAN(DATE '2019-01-01')
);
-- 或者开始:
WITH (ORIENTATION = COLUMN, COMPRESSION = LOW)
DISTRIBUTE BY HASH(id)
PARTITION by RANGE(sj) (
PARTITION P START(DATE '2018-01-01') END(DATE '2040-01-01') EVERY( INTERVAL '1' MONTH)
)
-- 或者结束
-- 插入数据示例
INSERT INTO test_partition_pd("id", "sj", "ms") VALUES (1, '1970-01-01 12:12:12', '最小');
INSERT INTO test_partition_pd("id", "sj", "ms") VALUES (2, '2020-01-02 01:02:03', '一月');
INSERT INTO test_partition_pd("id", "sj", "ms") VALUES (3, '2019-12-31 09:11:33', '十二月');
INSERT INTO test_partition_pd("id", "sj", "ms") VALUES (4, '2020-06-17 09:48:12', '六月');
-- 方法一
select c.relname,p.relname as part_relname,p.parttype,p.partkey,p.relfilenode,p.boundaries,p.reloptions
from pg_partition p
left join pg_tablespace t on p.reltablespace=t.oid
left join pg_class c on p.parentid=c.relfilenode
where c.relname = '表名称(不加schema)'
order by c.relname,p.relfilenode
-- 方法二
SELECT
relname, boundaries, spcname
FROM
pg_partition p
JOIN pg_tablespace t ON p.reltablespace=t.oid and p.parentid='标schema.表名称'::regclass
ORDER BY 1;
-- 方法三
SELECT * from pg_partition where relname = '表名称(不加schema)'
ALTER TABLE "odm"."test_partition"
ADD PARTITION P START(DATE '2019-01-01') END(DATE '2040-01-01') EVERY( INTERVAL '1' MONTH);
ALTER TABLE "odm"."test_partition"
ADD PARTITION P_MAX VALUES LESS THAN(MAXVALUE);
ALTER TABLE "odm"."test_partition_pd" drop PARTITION P_MAX;
ALTER TABLE "odm"."test_partition_pd"
ADD PARTITION p_123 start(date '2020-05-01') end (date '2020-06-01 ');
分区合并,分割均为新分区,非已存在分区
ALTER table 表schema.表名称 merge partitions
p1, p2
into PARTITION P_MIN;
-- 分割1
ALTER table 表schema.表名称
SPLIT partition p_min at (date '2005-01-01') into ( partition P_MIN_S, partition S)
-- 分割2
ALTER table 表schema.表名称
SPLIT partition s into ( partition s START(DATE '2005-01-01') END(DATE '2014-01-01') EVERY( INTERVAL '1' MONTH))
ALTER TABLE 表schema.表名称 RENAME PARTITION P8 TO P_9;
SELECT
*
from
information_schema.columns
where
table_schema = '表schema' and table_name = '表名称'
-- and column_name = 'xxrksj'
SELECT string_agg(column_name, ', ') val
from information_schema.columns
where table_schema = '表schema' and table_name = '表名称'
and column_name not in ('aaa', 'bbb', 'xxx')
WITH ccc AS (
WITH bbb AS (
SELECT
时间或其他排序字段,
主键,
ROW_NUMBER ( ) OVER ( PARTITION BY 主键 ) AS rn
FROM
表schema.表名称
WHERE
主键 IN (
WITH aaa AS ( SELECT 主键, COUNT ( 主键 ) AS ccc FROM 表schema.表名称 GROUP BY 主键 HAVING ccc > 1 ) SELECT
主键
FROM
aaa
)
ORDER BY
主键,
时间或其他排序字段 DESC
) SELECT
主键 || 时间或其他排序字段
FROM
bbb
WHERE
rn = 1
) SELECT *
-- delete
FROM
表schema.表名称
WHERE
主键 || 时间或其他排序字段 IN ( SELECT * FROM ccc )
-- 建立:
DROP SEQUENCE IF EXISTS "test"."p_rxzt_t_zd_dmb_id_seq";
CREATE SEQUENCE "test"."p_rxzt_t_zd_dmb_id_seq"
INCREMENT 1
MINVALUE 1
MAXVALUE 9223372036854775807
START 1
CACHE 1;
--使用
CREATE TABLE "test"."p_rxzt_t_zd_dmb" (
"zdbh" varchar(30) COLLATE "pg_catalog"."default" NOT NULL,
"zdmc" nvarchar2 COLLATE "pg_catalog"."default",
"dmbh" varchar(30) COLLATE "pg_catalog"."default",
"dmmc" nvarchar2 COLLATE "pg_catalog"."default",
"dmms" nvarchar2 COLLATE "pg_catalog"."default",
"dmsx" numeric(8),
"sjdmbh" varchar(30) COLLATE "pg_catalog"."default",
"yxbz" char(1) COLLATE "pg_catalog"."default",
"sxrq" timestamp(0),
"xxrksj" timestamp(3),
"xxgxsj" timestamp(3),
"id" int8 NOT NULL DEFAULT nextval('"test".p_rxzt_t_zd_dmb_id_seq'::regclass)
)
;
或者这样使用:
select id,nextval('test.p_rxzt_t_zd_dmb_id_seq') as ccc from jwy_sdm.s_1400_t_kkjbxx limit 10
SELECT
a.count,b.node_name
FROM
(
SELECT COUNT(*) AS count,xc_node_id FROM table_name GROUP BY xc_node_id
) a,
pgxc_node b
WHERE
a.xc_node_id=b.node_id
ORDER BY
a.count desc;