pg_trgm 处理中间匹配 like '%xxoo%'

os: centos 7.4
db: postgresql 10.10

pg_trgm模块提供用于决定基于 trigram 匹配的字母数字文本相似度的函数和操作符,以及支持快速搜索相似字符串的索引操作符类。
pg_trgm 是 contrib 包自带的 extension 。

yum install postgresql10*

# yum list installed |grep -i postgresql
postgresql10.x86_64                10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-contrib.x86_64        10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-debuginfo.x86_64      10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-devel.x86_64          10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-docs.x86_64           10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-libs.x86_64           10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-odbc.x86_64           11.01.0000-1PGDG.rhel7              @pgdg10  
postgresql10-plperl.x86_64         10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-plpython.x86_64       10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-pltcl.x86_64          10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-server.x86_64         10.10-1PGDG.rhel7                   @pgdg10  
postgresql10-tcl.x86_64            2.4.0-1.rhel7                       @pgdg10  
postgresql10-tcl-debuginfo.x86_64  2.3.1-1.rhel7                       @pgdg10  
postgresql10-test.x86_64           10.10-1PGDG.rhel7                   @pgdg10 

查看安装后的extension文件

# ls -l /usr/pgsql-10/lib/*pg_trgm*
-rwxr-xr-x 1 root root 45224 Oct 27 20:11 /usr/pgsql-10/lib/pg_trgm.so

# ls -l /usr/pgsql-10/share/extension/*pg_trgm*
-rw-r--r-- 1 root root  536 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.0--1.1.sql
-rw-r--r-- 1 root root 2192 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.1--1.2.sql
-rw-r--r-- 1 root root 2903 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.2--1.3.sql
-rw-r--r-- 1 root root 7975 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.3.sql
-rw-r--r-- 1 root root  177 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm.control
-rw-r--r-- 1 root root 4246 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--unpackaged--1.0.sql

create extension

如果需要让pg_trgm支持中文相似查询,数据库 lc_ctype 不能为"C",只有TOKEN分割正确效果才是OK的。

# su - postgres
$ psql

postgres=# select * from pg_available_extensions where name like '%pg_trgm%';
  name   | default_version | installed_version |                              comment                              
---------+-----------------+-------------------+-------------------------------------------------------------------
 pg_trgm | 1.3             |                   | text similarity measurement and index searching based on trigrams
(1 row)

postgres=# 
postgres=# show lc_ctype;
  lc_ctype   
-------------
 en_US.UTF-8
(1 row)

postgres=# create extension pg_trgm;
CREATE EXTENSION
postgres=# \dx 
                                   List of installed extensions
    Name    | Version |   Schema   |                            Description                            
------------+---------+------------+-------------------------------------------------------------------
 pg_trgm    | 1.3     | public     | text similarity measurement and index searching based on trigrams
(1 rows)

postgres=# \dx+ pg_trgm
                                  Objects in extension "pg_trgm"
                                        Object description                                        
--------------------------------------------------------------------------------------------------
 function gin_extract_query_trgm(text,internal,smallint,internal,internal,internal,internal)
 function gin_extract_value_trgm(text,internal)
 function gin_trgm_consistent(internal,smallint,text,integer,internal,internal,internal,internal)
 function gin_trgm_triconsistent(internal,smallint,text,integer,internal,internal,internal)
 function gtrgm_compress(internal)
 function gtrgm_consistent(internal,text,smallint,oid,internal)
 function gtrgm_decompress(internal)
 function gtrgm_distance(internal,text,smallint,oid,internal)
 function gtrgm_in(cstring)
 function gtrgm_out(gtrgm)
 function gtrgm_penalty(internal,internal,internal)
 function gtrgm_picksplit(internal,internal)
 function gtrgm_same(gtrgm,gtrgm,internal)
 function gtrgm_union(internal,internal)
 function set_limit(real)
 function show_limit()
 function show_trgm(text)
 function similarity_dist(text,text)
 function similarity_op(text,text)
 function similarity(text,text)
 function word_similarity_commutator_op(text,text)
 function word_similarity_dist_commutator_op(text,text)
 function word_similarity_dist_op(text,text)
 function word_similarity_op(text,text)
 function word_similarity(text,text)
 operator class gin_trgm_ops for access method gin
 operator class gist_trgm_ops for access method gist
 operator family gin_trgm_ops for access method gin
 operator family gist_trgm_ops for access method gist
 operator <<->(text,text)
 operator <->>(text,text)
 operator <->(text,text)
 operator <%(text,text)
 operator %>(text,text)
 operator %(text,text)
 type gtrgm
(36 rows)

index

postgres=# create table tmp_t0(id int8,name1 varchar(100),name2 varchar(100));
postgres=# 
postgres=# insert into tmp_t0(id,name1,name2) 
select id,md5(id::varchar),md5(md5(id::varchar)) 
from generate_series(1,5000000) as id;

postgres=# \timing

postgres=# select * from tmp_t0 where name2 like '%aaaaaa%';
   id    |              name1               |              name2               
---------+----------------------------------+----------------------------------
 4639283 | ecbb579bba1f3296d066984873bfc485 | 0a388eb41924033749764aaaaaa99755
 2089668 | e3013a6672357c768d3a1de6480b334a | 2aaaaaa184e1d4757dcacac6955dc54a
 2375149 | ee7886eeb94ffa7f8593e5870a98086b | e3e0cfd338265aaaaaa9ff5247eb3f7e
 3912530 | 519924f49b146e87f85a7c9ab239ad24 | d2f3aaaaaac8a84374b270b8a0bc5058
 2324531 | fbed5149a49e4c99a383fe1dcf47af39 | 94e3f45166b3175252e8ddd87aaaaaa6
 3543121 | d35b173585fc4e4f039e4579b05c5eeb | 8627b8186093438a1851aaaaaad68696
 4687675 | d6acea73c0716946d0502a3e36699f07 | e757e2c6b5c718552baaaaaaf560c614
(7 rows)

Time: 644.033 ms

GIST

postgres=# create index idx_tmp_t0_name1 on tmp_t0 USING GIST (name1 gist_trgm_ops);

postgres=# select * from tmp_t0 where name1 like '%aaaaaa%';

GIN

postgres=# create index idx_tmp_t0_name2 on tmp_t0 USING GIN (name2 gin_trgm_ops);

postgres=# select * from tmp_t0 where name2 like '%aaaaaa%';

更多使用,可以参考官方文档

参考:
http://postgres.cn/docs/10/pgtrgm.html

你可能感兴趣的:(#,postgresql,extension)