postgresql 数据库执行计划 Hash Join

os: centos 7.4
db: postgresql 10.11

Hash Join (散列连接) 实现可以理解为使用驱动表(小表)用来建立 hash map ,依次读取驱动表的数据,对于每一行数据根据连接条件生成一个 hash map 中的一个元組(生成 hashkey),
再和被驱动表比较 hashkey,如果不相同则直接丢弃,如果相同再次过一遍join condition和filter,满足条件的数据集返回。

Hash Join (散列连接) 不依赖索引,一般会选择个小表当驱动表。

Hash Join (散列连接) 通常都能取得很好的性能。

版本

# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core) 
# 
# su - postgres
$
$ psql -c "select version();"
                                                 version                                                  
----------------------------------------------------------------------------------------------------------
 PostgreSQL 10.11 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)

create table

$ psql
psql (10.11)
Type "help" for help.

postgres=# 
postgres=# drop table if exists tmp_t4;
drop table if exists tmp_t5;


postgres=# create table tmp_t4( 
	id    int8 primary key,
	name  varchar(100)
);

create table tmp_t5( 
	id    int8 primary key,
	name  varchar(100)
);

postgres=# insert into tmp_t4 
select id,
       md5(id::varchar)
  from generate_series(1,1000000) as id;

insert into tmp_t5 
select id,
       md5(id::varchar)
  from generate_series(1,1000000) as id;

postgres=#  

Hash Join

postgres=# set max_parallel_workers_per_gather=0;

postgres=# show work_mem;
 work_mem 
----------
 4MB
(1 row)

postgres=# explain analyze 
select t4.*,t5.*
  from tmp_t4 t4,
       tmp_t5 t5
 where 1=1
   and t4.id = t5.id
   and t4.id between 1
                 and 9999   
;
                                                                 QUERY PLAN                                                                 
--------------------------------------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=511.90..22482.91 rows=9553 width=82) (actual time=22.403..656.882 rows=9999 loops=1)
   Hash Cond: (t5.id = t4.id)
   ->  Seq Scan on tmp_t5 t5  (cost=0.00..19346.00 rows=1000000 width=41) (actual time=0.007..243.948 rows=1000000 loops=1)
   ->  Hash  (cost=392.48..392.48 rows=9553 width=41) (actual time=22.375..22.375 rows=9999 loops=1)
         Buckets: 16384  Batches: 1  Memory Usage: 841kB
         ->  Index Scan using tmp_t4_pkey on tmp_t4 t4  (cost=0.42..392.48 rows=9553 width=41) (actual time=0.051..6.121 rows=9999 loops=1)
               Index Cond: ((id >= 1) AND (id <= 9999))
 Planning time: 0.448 ms
 Execution time: 658.763 ms
(9 rows)


postgres=# set work_mem = "64MB";
SET
postgres=# show work_mem;
 work_mem 
----------
 64MB
(1 row)

postgres=# explain analyze 
select t4.*,t5.*
  from tmp_t4 t4,
       tmp_t5 t5
 where 1=1
   and t4.id = t5.id
   and t4.id between 1
                 and 9999   
;
  
                                                                 QUERY PLAN                                                                 
--------------------------------------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=511.90..22482.91 rows=9553 width=82) (actual time=10.084..653.416 rows=9999 loops=1)
   Hash Cond: (t5.id = t4.id)
   ->  Seq Scan on tmp_t5 t5  (cost=0.00..19346.00 rows=1000000 width=41) (actual time=0.005..255.684 rows=1000000 loops=1)
   ->  Hash  (cost=392.48..392.48 rows=9553 width=41) (actual time=10.063..10.063 rows=9999 loops=1)
         Buckets: 16384  Batches: 1  Memory Usage: 841kB
         ->  Index Scan using tmp_t4_pkey on tmp_t4 t4  (cost=0.42..392.48 rows=9553 width=41) (actual time=0.038..5.446 rows=9999 loops=1)
               Index Cond: ((id >= 1) AND (id <= 9999))
 Planning time: 0.320 ms
 Execution time: 655.363 ms
(9 rows)

参考:

你可能感兴趣的:(#,postgresql,opti,table,index,pgsql,postgresql,hashjoin)