最简便的方法是利用pg_class.reltuples,类似oracle的num_rows
postgres=# select reltuples::numeric from pg_class where relname='pgbench_accounts';
reltuples
-----------
20000000
(1 row)
加::numeric是为了防止数字太大,变成科学计数法
postgres=# select reltuples from pg_class where relname='pgbench_accounts';
reltuples
-----------
2e+07
(1 row)
但是这个字段的值需要收集统计信息后才有,如果统计信息过旧,也会不准确
create table tmp001(aid integer) WITH (autovacuum_enabled = off);
insert into tmp001 select aid from pgbench_accounts;
select reltuples::numeric from pg_class where relname='tmp001';
reltuples
-----------
0
(1 row)
如果没有统计信息或者比较旧了,又不想收集,可以使用explain
postgres=# EXPLAIN SELECT 1 FROM tmp001 limit 1;
QUERY PLAN
------------------------------------------------------------------------
Limit (cost=0.00..0.01 rows=1 width=4)
-> Seq Scan on tmp001 (cost=0.00..314160.80 rows=22566480 width=4)
(2 rows)
看到在完全没有统计信息的情况下,偏差大概在10%左右
收集之后,偏差明显减少
postgres=# analyze tmp001;
ANALYZE
postgres=# EXPLAIN SELECT 1 FROM tmp001 limit 1;
QUERY PLAN
------------------------------------------------------------------------
Limit (cost=0.00..0.01 rows=1 width=4)
-> Seq Scan on tmp001 (cost=0.00..288496.96 rows=20000096 width=4)
(2 rows)
但是注意不要EXPLAIN SELECT count(*),相差很大
postgres=# EXPLAIN SELECT count(*) FROM tmp001;
QUERY PLAN
--------------------------------------------------------------------------------------------
Finalize Aggregate (cost=193663.38..193663.39 rows=1 width=8)
-> Gather (cost=193663.17..193663.38 rows=2 width=8)
Workers Planned: 2
-> Partial Aggregate (cost=192663.17..192663.18 rows=1 width=8)
-> Parallel Seq Scan on tmp001 (cost=0.00..171829.73 rows=8333373 width=0)
(5 rows)
为了方便获取预估值,可以将执行计划输出转为json格式
postgres=# EXPLAIN (FORMAT JSON) SELECT 1 FROM tmp001 limit 1;
QUERY PLAN
-------------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Limit", +
"Parallel Aware": false, +
"Startup Cost": 0.00, +
"Total Cost": 0.01, +
"Plan Rows": 1, +
"Plan Width": 4, +
"Plans": [ +
{ +
"Node Type": "Seq Scan", +
"Parent Relationship": "Outer",+
"Parallel Aware": false, +
"Relation Name": "tmp001", +
"Alias": "tmp001", +
"Startup Cost": 0.00, +
"Total Cost": 288496.96, +
"Plan Rows": 20000096, +
"Plan Width": 4 +
} +
] +
} +
} +
]
(1 row)
创建函数,将Plan Rows转换成输出:
CREATE OR REPLACE FUNCTION countit(name,name)
RETURNS float4
LANGUAGE plpgsql AS
$$DECLARE
v_plan json;
BEGIN
EXECUTE format('EXPLAIN (FORMAT JSON) SELECT 1 FROM %I.%I', $1,$2)
INTO v_plan;
RETURN v_plan #>> '{0,Plan,"Plan Rows"}';
END;
$$;
postgres=# select countit('public','tmp001')::numeric;
countit
----------
20011000
(1 row)
SELECT
relname AS table,
CASE WHEN relkind = 'r'
THEN reltuples::numeric
ELSE countit(n.nspname,relname)::numeric
END AS approximate_count
FROM
pg_catalog.pg_class c
JOIN
pg_catalog.pg_namespace n ON (c.relkind IN ('r','v') AND c.relnamespace = n.oid)
ORDER BY 2 DESC;
table | approximate_count
---------------------------------------+-------------------
tmp001 | 20000000
test | 1608000
pgbench_branches | 5718
CREATE OR REPLACE FUNCTION countit(text)
RETURNS float4
LANGUAGE plpgsql AS
$$DECLARE
v_plan json;
BEGIN
EXECUTE 'EXPLAIN (FORMAT JSON) '||$1
INTO v_plan;
RETURN v_plan #>> '{0,Plan,"Plan Rows"}';
END;
$$;
用法测试
postgres=# create table t1234(id int, info text);
CREATE TABLE
postgres=# insert into t1234 select generate_series(1,1000000),'test';
INSERT 0 1000000
postgres=# analyze t1234;
ANALYZE
postgres=# select countit('select * from t1234 where id<1000');
countit
---------
954
(1 row)
postgres=# select countit('select * from t1234 where id between 1 and 1000 or (id between 100000 and 101000)');
countit
---------
1931
(1 row)
参考
https://github.com/digoal/blog/blob/master/201509/20150919_02.md
待解决疑问: