根据数据字典创建相关视图,方便查询
create schema dba;
create view dba.ro_conflicts as select datname,pg_stat_get_db_conflict_all(oid) conflict_all,pg_stat_get_db_conflict_bufferpin(oid) conflict_bufferpin,pg_stat_get_db_conflict_lock(oid) conflict_lock,pg_stat_get_db_conflict_snapshot(oid) conflict_snapshot,pg_stat_get_db_conflict_startup_deadlock(oid) conflict_deadlock,pg_stat_get_db_conflict_tablespace(oid) conflict_tbs from pg_database;
create or replace procedure dba.tps() as $$
declare
v1 int8;
v2 int8;
begin
select txid_snapshot_xmax(txid_current_snapshot()) into v1;
commit;
perform pg_sleep(1);
select txid_snapshot_xmax(txid_current_snapshot()) into v2;
commit;
raise notice 'tps: %', v2-v1;
end;
$$ language plpgsql ;
-- 在主节点查询
create view dba.ro_delay as select application_name,client_addr,client_port,write_lag,replay_lag,sync_state from pg_stat_replication ;
-- 在standby节点执行, 检查replay比receive的延迟
create view dba.node_delay as select * from pg_size_pretty(pg_wal_lsn_diff(pg_last_wal_receive_lsn(),pg_last_wal_replay_lsn())) as t(delay);
-- 在standby节点执行, 接收wal的速度。
CREATE OR REPLACE PROCEDURE dba.wal_receive_bw()
LANGUAGE plpgsql
AS $procedure$
declare
v1 pg_lsn;
v2 pg_lsn;
begin
select pg_last_wal_receive_lsn() into v1;
commit;
perform pg_sleep(1);
select pg_last_wal_receive_lsn() into v2;
commit;
raise notice 'wal receive bw: %/s', pg_size_pretty(pg_wal_lsn_diff(v2,v1));
end;
$procedure$;
-- 在standby节点执行, replay wal的速度。
CREATE OR REPLACE PROCEDURE dba.wal_replay_bw()
LANGUAGE plpgsql
AS $procedure$
declare
v1 pg_lsn;
v2 pg_lsn;
begin
select pg_last_wal_replay_lsn() into v1;
commit;
perform pg_sleep(1);
select pg_last_wal_replay_lsn() into v2;
commit;
raise notice 'wal replay bw: %/s', pg_size_pretty(pg_wal_lsn_diff(v2,v1));
end;
$procedure$;
create view dba.topsql as select calls,total_time,total_time/calls,query from pg_stat_statements where query !~ 'rds' order by total_time desc limit 5;
create view dba.qps as with
a as (select sum(calls) s from pg_stat_statements),
b as (select sum(calls) s from pg_stat_statements , pg_sleep(1))
select
b.s-a.s -- QPS
from a,b;
create view dba.session_acting_cnt as select count(*) from pg_stat_activity where wait_event is not null and (backend_xid is not null or backend_xmin is not null);
create view dba.sessions as select * from pg_stat_activity where wait_event is not null and (backend_xid is not null or backend_xmin is not null);
create view dba.locks as with
t_wait as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and not a.granted
),
t_run as
(
select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,
a.objid,a.objsubid,a.pid,a.virtualtransaction,a.virtualxid,a.transactionid,a.fastpath,
b.state,b.query,b.xact_start,b.query_start,b.usename,b.datname,b.client_addr,b.client_port,b.application_name
from pg_locks a,pg_stat_activity b where a.pid=b.pid and a.granted
),
t_overlap as
(
select r.* from t_wait w join t_run r on
(
r.locktype is not distinct from w.locktype and
r.database is not distinct from w.database and
r.relation is not distinct from w.relation and
r.page is not distinct from w.page and
r.tuple is not distinct from w.tuple and
r.virtualxid is not distinct from w.virtualxid and
r.transactionid is not distinct from w.transactionid and
r.classid is not distinct from w.classid and
r.objid is not distinct from w.objid and
r.objsubid is not distinct from w.objsubid and
r.pid <> w.pid
)
),
t_unionall as
(
select r.* from t_overlap r
union all
select w.* from t_wait w
)
select locktype,datname,relation::regclass,page,tuple,virtualxid,transactionid::text,classid::regclass,objid,objsubid,
string_agg(
'Pid: '||case when pid is null then 'NULL' else pid::text end||chr(10)||
'Lock_Granted: '||case when granted is null then 'NULL' else granted::text end||' , Mode: '||case when mode is null then 'NULL' else mode::text end||' , FastPath: '||case when fastpath is null then 'NULL' else fastpath::text end||' , VirtualTransaction: '||case when virtualtransaction is null then 'NULL' else virtualtransaction::text end||' , Session_State: '||case when state is null then 'NULL' else state::text end||chr(10)||
'Username: '||case when usename is null then 'NULL' else usename::text end||' , Database: '||case when datname is null then 'NULL' else datname::text end||' , Client_Addr: '||case when client_addr is null then 'NULL' else client_addr::text end||' , Client_Port: '||case when client_port is null then 'NULL' else client_port::text end||' , Application_Name: '||case when application_name is null then 'NULL' else application_name::text end||chr(10)||
'Xact_Start: '||case when xact_start is null then 'NULL' else xact_start::text end||' , Query_Start: '||case when query_start is null then 'NULL' else query_start::text end||' , Xact_Elapse: '||case when (now()-xact_start) is null then 'NULL' else (now()-xact_start)::text end||' , Query_Elapse: '||case when (now()-query_start) is null then 'NULL' else (now()-query_start)::text end||chr(10)||
'SQL (Current SQL in Transaction): '||chr(10)||
case when query is null then 'NULL' else query::text end,
chr(10)||'--------'||chr(10)
order by
( case mode
when 'INVALID' then 0
when 'AccessShareLock' then 1
when 'RowShareLock' then 2
when 'RowExclusiveLock' then 3
when 'ShareUpdateExclusiveLock' then 4
when 'ShareLock' then 5
when 'ShareRowExclusiveLock' then 6
when 'ExclusiveLock' then 7
when 'AccessExclusiveLock' then 8
else 0
end ) desc,
(case when granted then 0 else 1 end)
) as lock_conflict
from t_unionall
group by
locktype,datname,relation,page,tuple,virtualxid,transactionid::text,classid,objid,objsubid ;
create view dba.top10sizetable as
select schemaname,tablename,pg_size_pretty(pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass)) from pg_tables order by pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass) desc limit 10;
create view dba.top10sizeindex as
select schemaname,tablename,indexname,pg_size_pretty(pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(indexname))::regclass)) from pg_indexes order by pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(indexname))::regclass) desc limit 10;
create view dba.top10sizetableindex as
select schemaname,tablename,pg_size_pretty(pg_total_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass)) from pg_tables order by pg_total_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass) desc limit 10;
create view dba.top10updatetable as
select schemaname,relname,n_tup_upd,n_tup_del,round(n_tup_hot_upd/(case when n_tup_upd=0 then 1.0 else n_tup_upd::numeric end),4) from pg_stat_all_tables order by n_tup_upd+n_tup_del desc limit 10;
create view dba.top10inserttable as
select schemaname,relname,n_tup_ins from pg_stat_all_tables order by n_tup_ins desc limit 10;
create view dba.top10deadtable as
select schemaname,relname,n_dead_tup from pg_stat_all_tables order by n_dead_tup desc limit 10;
create view dba.top10age as
select relnamespace::regnamespace,relname,pg_size_pretty(pg_relation_size(oid)),age(relfrozenxid) from pg_class where relkind='r' and relnamespace<>'pg_catalog'::regnamespace and relnamespace<>'information_schema'::regnamespace order by age(relfrozenxid) desc,pg_relation_size(oid) desc limit 10;
create view dba.oldestxact as
select datname,usename,xact_start,query_start,backend_xid,backend_xmin,
now()-xact_start as old_ts,
txid_current()-least(backend_xid::text::int8,backend_xmin::text::int8) as old_xacts,
query
from pg_stat_activity
order by least(backend_xid::text::int8,backend_xmin::text::int8) limit 1;
-- 查询膨胀空间top 10的表
create view dba.top10bloatsizetable as
SELECT
current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
CASE WHEN relpages < otta THEN '0 bytes'::text ELSE pg_size_pretty((bs*(relpages-otta))::bigint) END AS wastedsize,
iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
CASE WHEN ipages < iotta THEN '0 bytes' ELSE pg_size_pretty((bs*(ipages-iotta))::bigint) END AS wastedisize,
pg_size_pretty(CASE WHEN relpages < otta THEN
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
END) AS totalwastedbytes
FROM (
SELECT
nn.nspname AS schemaname,
cc.relname AS tablename,
COALESCE(cc.reltuples,0) AS reltuples,
COALESCE(cc.relpages,0) AS relpages,
COALESCE(bs,0) AS bs,
COALESCE(CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM
pg_class cc
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> 'information_schema'
LEFT JOIN
(
SELECT
ma,bs,foo.nspname,foo.relname,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
ns.nspname, tbl.relname, hdr, ma, bs,
SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
MAX(coalesce(null_frac,0)) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
) AS nullhdr
FROM pg_attribute att
JOIN pg_class tbl ON att.attrelid = tbl.oid
JOIN pg_namespace ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname
AND s.inherited=false
AND s.attname=att.attname,
(
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN SUBSTRING(SPLIT_PART(v, ' ', 2) FROM '#"[0-9]+.[0-9]+#"%' for '#')
IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' OR v ~ '64-bit' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
WHERE att.attnum > 0 AND tbl.relkind='r'
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
ON cc.relname = rs.relname AND nn.nspname = rs.nspname
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml order by wastedbytes desc limit 5;
-- 查询膨胀空间top 10的索引
create view dba.top10bloatsizeindex as
SELECT
current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
CASE WHEN relpages < otta THEN '0 bytes'::text ELSE pg_size_pretty((bs*(relpages-otta))::bigint) END AS wastedsize,
iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
CASE WHEN ipages < iotta THEN '0 bytes' ELSE pg_size_pretty((bs*(ipages-iotta))::bigint) END AS wastedisize,
pg_size_pretty(CASE WHEN relpages < otta THEN
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
END) AS totalwastedbytes
FROM (
SELECT
nn.nspname AS schemaname,
cc.relname AS tablename,
COALESCE(cc.reltuples,0) AS reltuples,
COALESCE(cc.relpages,0) AS relpages,
COALESCE(bs,0) AS bs,
COALESCE(CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM
pg_class cc
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> 'information_schema'
LEFT JOIN
(
SELECT
ma,bs,foo.nspname,foo.relname,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
ns.nspname, tbl.relname, hdr, ma, bs,
SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
MAX(coalesce(null_frac,0)) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
) AS nullhdr
FROM pg_attribute att
JOIN pg_class tbl ON att.attrelid = tbl.oid
JOIN pg_namespace ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname
AND s.inherited=false
AND s.attname=att.attname,
(
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN SUBSTRING(SPLIT_PART(v, ' ', 2) FROM '#"[0-9]+.[0-9]+#"%' for '#')
IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' OR v ~ '64-bit' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
WHERE att.attnum > 0 AND tbl.relkind='r'
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
ON cc.relname = rs.relname AND nn.nspname = rs.nspname
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml order by wastedibytes desc limit 5;
-- 查询膨胀比例top 10的表(浪费空间大于10MB的表)
create view dba.top10bloatratiotable as
SELECT
current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
CASE WHEN relpages < otta THEN '0 bytes'::text ELSE pg_size_pretty((bs*(relpages-otta))::bigint) END AS wastedsize,
iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
CASE WHEN ipages < iotta THEN '0 bytes' ELSE pg_size_pretty((bs*(ipages-iotta))::bigint) END AS wastedisize,
pg_size_pretty(CASE WHEN relpages < otta THEN
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
END) AS totalwastedbytes
FROM (
SELECT
nn.nspname AS schemaname,
cc.relname AS tablename,
COALESCE(cc.reltuples,0) AS reltuples,
COALESCE(cc.relpages,0) AS relpages,
COALESCE(bs,0) AS bs,
COALESCE(CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM
pg_class cc
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> 'information_schema'
LEFT JOIN
(
SELECT
ma,bs,foo.nspname,foo.relname,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
ns.nspname, tbl.relname, hdr, ma, bs,
SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
MAX(coalesce(null_frac,0)) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
) AS nullhdr
FROM pg_attribute att
JOIN pg_class tbl ON att.attrelid = tbl.oid
JOIN pg_namespace ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname
AND s.inherited=false
AND s.attname=att.attname,
(
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN SUBSTRING(SPLIT_PART(v, ' ', 2) FROM '#"[0-9]+.[0-9]+#"%' for '#')
IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' OR v ~ '64-bit' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
WHERE att.attnum > 0 AND tbl.relkind='r'
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
ON cc.relname = rs.relname AND nn.nspname = rs.nspname
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
where (CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END) >= 10240000
order by tbloat desc,wastedbytes desc limit 5;
-- 查询膨胀比例top 10的索引(浪费空间大于10MB的索引)
create view dba.top10bloatratioindex as
SELECT
current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
CASE WHEN relpages < otta THEN '0 bytes'::text ELSE pg_size_pretty((bs*(relpages-otta))::bigint) END AS wastedsize,
iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
CASE WHEN ipages < iotta THEN '0 bytes' ELSE pg_size_pretty((bs*(ipages-iotta))::bigint) END AS wastedisize,
pg_size_pretty(CASE WHEN relpages < otta THEN
CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
END) AS totalwastedbytes
FROM (
SELECT
nn.nspname AS schemaname,
cc.relname AS tablename,
COALESCE(cc.reltuples,0) AS reltuples,
COALESCE(cc.relpages,0) AS relpages,
COALESCE(bs,0) AS bs,
COALESCE(CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM
pg_class cc
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> 'information_schema'
LEFT JOIN
(
SELECT
ma,bs,foo.nspname,foo.relname,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
ns.nspname, tbl.relname, hdr, ma, bs,
SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
MAX(coalesce(null_frac,0)) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
) AS nullhdr
FROM pg_attribute att
JOIN pg_class tbl ON att.attrelid = tbl.oid
JOIN pg_namespace ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname
AND s.inherited=false
AND s.attname=att.attname,
(
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN SUBSTRING(SPLIT_PART(v, ' ', 2) FROM '#"[0-9]+.[0-9]+#"%' for '#')
IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' OR v ~ '64-bit' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
WHERE att.attnum > 0 AND tbl.relkind='r'
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
ON cc.relname = rs.relname AND nn.nspname = rs.nspname
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
where (CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END) >= 10240000
order by ibloat desc,wastedibytes desc limit 5;
create view dba.seqs as select max_value-last_value,* from pg_sequences order by max_value-last_value ;
-- 查询没有使用过的大于1MB的索引 top 10 (注意, PK、UK如果只是用于约束, 可能不会被统计计数,但是不能删掉)
create view dba.top10notusedidx as
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where pg_relation_size(indexrelid)>=1024000 and (idx_scan=0 or idx_tup_read=0 or idx_tup_fetch=0)
and schemaname not in ('pg_toast','pg_catalog') order by pg_relation_size(indexrelid) desc limit 10;
-- 查询没有使用过的大于1MB的表 top 10
create view dba.top10notusedtab as
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables
where pg_relation_size(relid)>=1024000 and seq_scan=0 and idx_scan=0 and schemaname not in ('pg_toast','pg_catalog','information_schema') order by pg_relation_size(relid) desc limit 10;
-- 查询热表top 10
create view dba.top10hottab as
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables where schemaname not in ('pg_toast','pg_catalog','information_schema') order by seq_scan+idx_scan desc, pg_relation_size(relid) desc limit 10;
-- 查询大于1MB的冷表top 10
create view dba.top10coldtab as
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables where schemaname not in ('pg_toast','pg_catalog','information_schema') and pg_relation_size(relid)>=1024000 order by seq_scan+idx_scan,pg_relation_size(relid) desc limit 10;
-- 查询热索引top 10
create view dba.top10hotidx as
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where
schemaname not in ('pg_toast','pg_catalog') order by idx_scan+idx_tup_read+idx_tup_fetch desc, pg_relation_size(indexrelid) desc limit 10;
-- 查询大于1MB的冷索引top 10(注意, PK、UK如果只是用于约束, 可能不会被统计计数,但是不能删掉)
create view dba.top10coldidx as
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where pg_relation_size(indexrelid)>=1024000
and schemaname not in ('pg_toast','pg_catalog') order by idx_scan+idx_tup_read+idx_tup_fetch , pg_relation_size(indexrelid) desc limit 10;
-- freeze风暴预测相关的3个视图
create view dba.v_freeze as
select
e.*,
a.*
from
(select
current_setting('autovacuum_freeze_max_age')::int as v1, -- 如果表的事务ID年龄大于该值, 即使未开启autovacuum也会强制触发FREEZE, 并告警Preventing Transaction ID Wraparound Failures
current_setting('autovacuum_multixact_freeze_max_age')::int as v2, -- 如果表的并行事务ID年龄大于该值, 即使未开启autovacuum也会强制触发FREEZE, 并告警Preventing Transaction ID Wraparound Failures
current_setting('vacuum_freeze_min_age')::int as v3, -- 手动或自动垃圾回收时, 如果记录的事务ID年龄大于该值, 将被FREEZE
current_setting('vacuum_multixact_freeze_min_age')::int as v4, -- 手动或自动垃圾回收时, 如果记录的并行事务ID年龄大于该值, 将被FREEZE
current_setting('vacuum_freeze_table_age')::int as v5, -- 手动垃圾回收时, 如果表的事务ID年龄大于该值, 将触发FREEZE. 该参数的上限值为 %95 autovacuum_freeze_max_age
current_setting('vacuum_multixact_freeze_table_age')::int as v6, -- 手动垃圾回收时, 如果表的并行事务ID年龄大于该值, 将触发FREEZE. 该参数的上限值为 %95 autovacuum_multixact_freeze_max_age
current_setting('autovacuum_vacuum_cost_delay') as v7, -- 自动垃圾回收时, 每轮回收周期后的一个休息时间, 主要防止垃圾回收太耗资源. -1 表示沿用vacuum_cost_delay的设置
current_setting('autovacuum_vacuum_cost_limit') as v8, -- 自动垃圾回收时, 每轮回收周期设多大限制, 限制由vacuum_cost_page_hit,vacuum_cost_page_missvacuum_cost_page_dirty参数以及周期内的操作决定. -1 表示沿用vacuum_cost_limit的设置
current_setting('vacuum_cost_delay') as v9, -- 手动垃圾回收时, 每轮回收周期后的一个休息时间, 主要防止垃圾回收太耗资源.
current_setting('vacuum_cost_limit') as v10, -- 手动垃圾回收时, 每轮回收周期设多大限制, 限制由vacuum_cost_page_hit,vacuum_cost_page_missvacuum_cost_page_dirty参数以及周期内的操作决定.
current_setting('autovacuum') as autovacuum -- 是否开启自动垃圾回收
) a,
LATERAL ( -- LATERAL 允许你在这个SUBQUERY中直接引用前面的table, subquery中的column
select
pg_size_pretty(pg_total_relation_size(oid)) sz, -- 表的大小(含TOAST, 索引)
oid::regclass as reloid, -- 表名(物化视图)
relkind, -- r=表, m=物化视图
coalesce(
least(
substring(reloptions::text, 'autovacuum_freeze_max_age=(\d+)')::int,
substring(reloptions::text, 'autovacuum_freeze_table_age=(\d+)')::int
),
a.v1
)
-
age(case when relfrozenxid::text::int<3 then null else relfrozenxid end)
as remain_ages_xid, -- 再产生多少个事务后, 自动垃圾回收会触发FREEZE, 起因为事务ID
coalesce(
least(
substring(reloptions::text, 'autovacuum_multixact_freeze_max_age=(\d+)')::int,
substring(reloptions::text, 'autovacuum_multixact_freeze_table_age=(\d+)')::int
),
a.v2
)
-
age(case when relminmxid::text::int<3 then null else relminmxid end)
as remain_ages_mxid, -- 再产生多少个事务后, 自动垃圾回收会触发FREEZE, 起因为并发事务ID
coalesce(
least(
substring(reloptions::text, 'autovacuum_freeze_min_age=(\d+)')::int
),
a.v3
) as xid_lower_to_minage, -- 如果触发FREEZE, 该表的事务ID年龄会降到多少
coalesce(
least(
substring(reloptions::text, 'autovacuum_multixact_freeze_min_age=(\d+)')::int
),
a.v4
) as mxid_lower_to_minage, -- 如果触发FREEZE, 该表的并行事务ID年龄会降到多少
case
when v5 <= age(case when relfrozenxid::text::int<3 then null else relfrozenxid end) then 'YES'
else 'NOT'
end as vacuum_trigger_freeze1, -- 如果手工执行VACUUM, 是否会触发FREEZE, 触发起因(事务ID年龄达到阈值)
case
when v6 <= age(case when relminmxid::text::int<3 then null else relminmxid end) then 'YES'
else 'NOT'
end as vacuum_trigger_freeze2, -- 如果手工执行VACUUM, 是否会触发FREEZE, 触发起因(并行事务ID年龄达到阈值)
reloptions -- 表级参数, 优先. 例如是否开启自动垃圾回收, autovacuum_freeze_max_age, autovacuum_freeze_table_age, autovacuum_multixact_freeze_max_age, autovacuum_multixact_freeze_table_age
from pg_class
where relkind in ('r','m')
) e
order by
least(e.remain_ages_xid , e.remain_ages_mxid), -- 排在越前, 越先触发自动FREEZE, 即风暴来临的预测
pg_total_relation_size(reloid) desc -- 同样剩余年龄, 表越大, 排越前
;
create view dba.v_freeze_stat as
select
wb, -- 第几个BATCH, 每个batch代表流逝100万个事务
cnt, -- 这个batch 有多少表
pg_size_pretty(ssz) as ssz1, -- 这个batch 这些 表+TOAST+索引 有多少容量
pg_size_pretty(ssz) as ssz2, -- 这个batch FREEZE 会导致多少读IO
pg_size_pretty(ssz*3) as ssz3, -- 这个batch FREEZE 最多可能会导致多少写IO (通常三份 : 数据文件, WAL FULL PAGE, WAL)
pg_size_pretty(min_sz) as ssz4, -- 这个batch 最小的表多大
pg_size_pretty(max_sz) as ssz5, -- 这个batch 最大的表多大
pg_size_pretty(avg_sz) as ssz6, -- 这个batch 平均表多大
pg_size_pretty(stddev_sz) as ssz7, -- 这个batch 表大小的方差, 越大, 说明表大小差异化明显
min_rest_age, -- 这个batch 距离自动FREEZE最低剩余事务数
max_rest_age, -- 这个batch 距离自动FREEZE最高剩余事务数
stddev_rest_age, -- 这个batch 距离自动FREEZE剩余事务数的方差, 越小,说明这个batch触发freeze将越平缓, 越大, 说明这个batch将有可能在某些点集中触发freeze (但是可能集中触发的都是小表)
corr_rest_age_sz, -- 表大小与距离自动freeze剩余事务数的相关性,相关性越强(值趋向1或-1) stddev_rest_age 与 sz7 说明的问题越有价值
round(100*(ssz/(sum(ssz) over ())), 2)||' %' as ratio -- 这个BATCH的容量占比,占比如果非常不均匀,说明有必要调整表级FREEZE参数,让占比均匀化
from
(
select a.*, b.* from
(
select
min(least(remain_ages_xid, remain_ages_mxid)) as v_min, -- 整个数据库中离自动FREEZE的 最小 剩余事务ID数
max(least(remain_ages_xid, remain_ages_mxid)) as v_max -- 整个数据库中离自动FREEZE的 最大 剩余事务ID数
from v_freeze
) as a,
LATERAL ( -- 高级SQL
select
width_bucket(
least(remain_ages_xid, remain_ages_mxid),
a.v_min,
a.v_max,
greatest((a.v_max-a.v_min)/1000000, 1) -- 100万个事务, 如果要更改统计例如,修改这个值即可
) as wb,
count(*) as cnt,
sum(pg_total_relation_size(reloid)) as ssz,
stddev_samp(pg_total_relation_size(reloid) order by least(remain_ages_xid, remain_ages_mxid)) as stddev_sz,
min(pg_total_relation_size(reloid)) as min_sz,
max(pg_total_relation_size(reloid)) as max_sz,
avg(pg_total_relation_size(reloid)) as avg_sz,
min(least(remain_ages_xid, remain_ages_mxid)) as min_rest_age,
max(least(remain_ages_xid, remain_ages_mxid)) as max_rest_age,
stddev_samp(least(remain_ages_xid, remain_ages_mxid) order by least(remain_ages_xid, remain_ages_mxid)) as stddev_rest_age,
corr(least(remain_ages_xid, remain_ages_mxid), pg_total_relation_size(reloid)) as corr_rest_age_sz
from v_freeze
group by wb
) as b
) t
order by wb;
create view dba.v_freeze_stat_detail as
select
pg_size_pretty(t.ssz) as ssz2, -- 这个batch FREEZE 会导致多少读IO (表+TOAST+索引)
pg_size_pretty(t.ssz*3) as ssz3, -- 这个batch FREEZE 最多可能会导致多少写IO (通常三份 : 数据文件, WAL FULL PAGE, WAL)
pg_size_pretty(t.ssz_sum) as ssz4, -- 所有batch 所有表的总大小 (表+TOAST+索引)
round(100*(t.ssz/t.ssz_sum), 2)||' %' as ratio_batch, -- 这个BATCH的容量占比,目标是让所有BATCH占比尽量一致
round(100*(pg_total_relation_size(t.reloid)/t.ssz), 2)||' %' as ratio_table, -- 这个表占整个batch的容量占比,大表尽量错开freeze
t.*
from
(
select a.*, b.* from
(
select
min(least(remain_ages_xid, remain_ages_mxid)) as v_min, -- 整个数据库中离自动FREEZE的 最小 剩余事务ID数
max(least(remain_ages_xid, remain_ages_mxid)) as v_max -- 整个数据库中离自动FREEZE的 最大 剩余事务ID数
from v_freeze
) as a,
LATERAL ( -- 高级SQL
select
count(*) over w as cnt, -- 这个batch 有多少表
sum(pg_total_relation_size(reloid)) over () as ssz_sum, -- 所有batch 所有表的总大小 (表+TOAST+索引)
sum(pg_total_relation_size(reloid)) over w as ssz, -- 这个batch 的表大小总和 (表+TOAST+索引)
pg_size_pretty(min(pg_total_relation_size(reloid)) over w) as min_sz, -- 这个batch 最小的表多大
pg_size_pretty(max(pg_total_relation_size(reloid)) over w) as max_sz, -- 这个batch 最大的表多大
pg_size_pretty(avg(pg_total_relation_size(reloid)) over w) as avg_sz, -- 这个batch 平均表多大
pg_size_pretty(stddev_samp(pg_total_relation_size(reloid)) over w) as stddev_sz, -- 这个batch 表大小的方差, 越大, 说明表大小差异化明显
min(least(remain_ages_xid, remain_ages_mxid)) over w as min_rest_age, -- 这个batch 距离自动FREEZE最低剩余事务数
max(least(remain_ages_xid, remain_ages_mxid)) over w as max_rest_age, -- 这个batch 距离自动FREEZE最高剩余事务数
stddev_samp(least(remain_ages_xid, remain_ages_mxid)) over w as stddev_rest_age, -- 这个batch 距离自动FREEZE剩余事务数的方差, 越小,说明这个batch触发freeze将越平缓, 越大, 说明这个batch将有可能在某些点集中触发freeze (但是可能集中触发的都是小表)
corr(least(remain_ages_xid, remain_ages_mxid), pg_total_relation_size(reloid)) over w as corr_rest_age_sz, -- 表大小与距离自动freeze剩余事务数的相关性,相关性越强(值趋向1或-1) stddev_rest_age 与 stddev_sz 说明的问题越有价值
t1.*
from
(
select
width_bucket(
least(tt.remain_ages_xid, tt.remain_ages_mxid),
a.v_min,
a.v_max,
greatest((a.v_max-a.v_min)/1000000, 1) -- 100万个事务, 如果要更改统计例如,修改这个值即可
)
as wb, -- 第几个BATCH, 每个batch代表流逝100万个事务
* from v_freeze tt
) as t1
window w as
(
partition by t1.wb
)
) as b
) t
order by
t.wb,
least(t.remain_ages_xid, t.remain_ages_mxid),
pg_total_relation_size(t.reloid) desc
;
create view dba.top20freezebigtable as
select relowner::regrole, relnamespace::regnamespace, relname,
age(relfrozenxid),pg_size_pretty(pg_total_relation_size(oid)) , -- 当前年龄
coalesce(
least(
substring(reloptions::text, 'autovacuum_freeze_max_age=(\d+)')::int,
substring(reloptions::text, 'autovacuum_freeze_table_age=(\d+)')::int
),
current_setting('autovacuum_freeze_max_age')::int
)
-
age(case when relfrozenxid::text::int<3 then null else relfrozenxid end)
as remain_ages_xid, -- 再产生多少个事务后, 自动垃圾回收会触发FREEZE, 起因为事务ID
coalesce(
least(
substring(reloptions::text, 'autovacuum_freeze_min_age=(\d+)')::int
),
current_setting('vacuum_freeze_min_age')::int
) as xid_lower_to_minage -- 如果触发FREEZE, 该表的事务ID年龄会降到多少
from pg_class where relkind='r' order by pg_total_relation_size(oid) desc limit 20;
常用查询
1、查询只读节点延迟
– 在主节点查询
select * from dba.ro_delay;
– 在只读节点查询
set lock_timeout='10ms';
set statement_timeout='2s';
select * from dba.node_delay;
2、查询top query, 优化之首
select * from dba.topsql;
3、重置top query统计计数器(通常在高峰期来临前可以重置,防止结果干扰)
select pg_stat_statements_reset();
4、查询 qps , 在psql 终端可以每秒打印一次
select * from dba.qps;
\watch 1
4.1、查询tps
call dba.tps();
5、查询活跃会话数, 如果超过CPU核数, 说明数据库非常非常繁忙, 需要注意优化
select * from dba.session_acting_cnt;
6、当前活跃会话
select * from dba.sessions;
7、查询锁等待, 如果有大量长时间等待, 需要注意业务逻辑是否有问题
select * from dba.locks;
8、查询占用空间top 10的表
select * from dba.top10sizetable;
9、查询占用空间top 10的索引
select * from dba.top10sizeindex;
10、查询占用空间top 10的表(含索引)
select * from dba.top10sizetableindex;
11、查询膨胀空间top 10的表
select * from dba.top10bloatsizetable;
12、查询膨胀空间top 10的索引
select * from dba.top10bloatsizeindex;
13、查询膨胀比例top 10的表
select * from dba.top10bloatratiotable;
14、查询膨胀比例top 10的索引
select * from dba.top10bloatratioindex;
15、查询更新和删除记录条数top 10的表
select * from dba.top10updatetable;
16、查询插入记录条数top 10的表
select * from dba.top10inserttable;
17、查询脏记录条数top 10的表
select * from dba.top10deadtable;
18、查询年龄top 10的表
select * from dba.top10age;
19、查询当前的最老事务距离当前时间、距离当前事务数, 说明膨胀空间大小, 越大可能导致越多膨胀垃圾.
select * from dba.oldestxact;
select * from pg_prepared_xacts;
20、查询序列的剩余空间
select * from dba.seqs;
21、PostgreSQL 谁堵塞了谁(锁等待检测)- pg_blocking_pids
PostgreSQL 谁堵塞了谁(锁等待检测)
22、查询没有使用过的大于1MB的索引 top 10 (注意, PK、UK如果只是用于约束, 可能不会被统计计数,但是不能删掉)
select * from dba.top10notusedidx;
23、查询没有使用过的大于1MB的表 top 10
select * from dba.top10notusedtab;
24、查询热表top 10
select * from dba.top10hottab;
25、查询大于1MB的冷表top 10
select * from dba.top10coldtab;
26、查询热索引top 10
select * from dba.top10hotidx;
27、查询大于1MB的冷索引top 10(注意, PK、UK如果只是用于约束, 可能不会被统计计数,但是不能删掉)
select * from dba.top10coldidx;
28、查询数据库freeze风暴预测
select * from dba.v_freeze;
select * from dba.v_freeze_stat;
select * from dba.v_freeze_stat_detail;
查询top 20的大表大freeze剩余年龄。
select * from dba.top20freezebigtable;
– 结合dba.tps, 可以通过remain_ages_xid/dba.tps估算每个表还有多久会发生freeze.
call dba.tps();
29、查询RO节点读与replay冲突次数, 建议高频恢复中的ro节点不要跑长sql。
select * from dba.ro_conflicts;
30、DBA在RO 节点人为执行SQL前, 建议设置sql超时, 避免长时间跑 SQL, 导致不必要的replay延迟和 conflict cancel statement
set statement_timeout ='1s';
set lock_timeout='10ms';
31、RO 节点的conflict容忍时间最长设置, 默认为5 min
show max_standby_streaming_delay ;
max_standby_streaming_delay
-----------------------------
5min
(1 row)
32、清理数据库stat计数器
\df *.*reset*
List of functions
Schema | Name | Result data type | Argument data types | Type
------------+----------------------------------------+--------------------------+---------------------+------
pg_catalog | pg_replication_origin_session_reset | void | | func
pg_catalog | pg_replication_origin_xact_reset | void | | func
pg_catalog | pg_stat_get_bgwriter_stat_reset_time | timestamp with time zone | | func
pg_catalog | pg_stat_get_db_stat_reset_time | timestamp with time zone | oid | func
pg_catalog | pg_stat_reset | void | | func
pg_catalog | pg_stat_reset_shared | void | text | func
pg_catalog | pg_stat_reset_single_function_counters | void | oid | func
pg_catalog | pg_stat_reset_single_table_counters | void | oid | func
public | pg_stat_statements_reset | void | | func
(9 rows)
33、在standby节点执行, 检查当前standby节点接收wal的速度
call dba.wal_receive_bw();
34、在standby节点执行, 检查当前standby节点replay wal的速度
call dba.wal_replay_bw();