os: centos 7.4
db: postgresql 10.10
pg_trgm模块提供用于决定基于 trigram 匹配的字母数字文本相似度的函数和操作符,以及支持快速搜索相似字符串的索引操作符类。
pg_trgm 是 contrib 包自带的 extension 。
# yum list installed |grep -i postgresql
postgresql10.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-contrib.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-debuginfo.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-devel.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-docs.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-libs.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-odbc.x86_64 11.01.0000-1PGDG.rhel7 @pgdg10
postgresql10-plperl.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-plpython.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-pltcl.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-server.x86_64 10.10-1PGDG.rhel7 @pgdg10
postgresql10-tcl.x86_64 2.4.0-1.rhel7 @pgdg10
postgresql10-tcl-debuginfo.x86_64 2.3.1-1.rhel7 @pgdg10
postgresql10-test.x86_64 10.10-1PGDG.rhel7 @pgdg10
查看安装后的extension文件
# ls -l /usr/pgsql-10/lib/*pg_trgm*
-rwxr-xr-x 1 root root 45224 Oct 27 20:11 /usr/pgsql-10/lib/pg_trgm.so
# ls -l /usr/pgsql-10/share/extension/*pg_trgm*
-rw-r--r-- 1 root root 536 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.0--1.1.sql
-rw-r--r-- 1 root root 2192 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.1--1.2.sql
-rw-r--r-- 1 root root 2903 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.2--1.3.sql
-rw-r--r-- 1 root root 7975 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--1.3.sql
-rw-r--r-- 1 root root 177 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm.control
-rw-r--r-- 1 root root 4246 Oct 27 20:11 /usr/pgsql-10/share/extension/pg_trgm--unpackaged--1.0.sql
如果需要让pg_trgm支持中文相似查询,数据库 lc_ctype 不能为"C",只有TOKEN分割正确效果才是OK的。
# su - postgres
$ psql
postgres=# select * from pg_available_extensions where name like '%pg_trgm%';
name | default_version | installed_version | comment
---------+-----------------+-------------------+-------------------------------------------------------------------
pg_trgm | 1.3 | | text similarity measurement and index searching based on trigrams
(1 row)
postgres=#
postgres=# show lc_ctype;
lc_ctype
-------------
en_US.UTF-8
(1 row)
postgres=# create extension pg_trgm;
CREATE EXTENSION
postgres=# \dx
List of installed extensions
Name | Version | Schema | Description
------------+---------+------------+-------------------------------------------------------------------
pg_trgm | 1.3 | public | text similarity measurement and index searching based on trigrams
(1 rows)
postgres=# \dx+ pg_trgm
Objects in extension "pg_trgm"
Object description
--------------------------------------------------------------------------------------------------
function gin_extract_query_trgm(text,internal,smallint,internal,internal,internal,internal)
function gin_extract_value_trgm(text,internal)
function gin_trgm_consistent(internal,smallint,text,integer,internal,internal,internal,internal)
function gin_trgm_triconsistent(internal,smallint,text,integer,internal,internal,internal)
function gtrgm_compress(internal)
function gtrgm_consistent(internal,text,smallint,oid,internal)
function gtrgm_decompress(internal)
function gtrgm_distance(internal,text,smallint,oid,internal)
function gtrgm_in(cstring)
function gtrgm_out(gtrgm)
function gtrgm_penalty(internal,internal,internal)
function gtrgm_picksplit(internal,internal)
function gtrgm_same(gtrgm,gtrgm,internal)
function gtrgm_union(internal,internal)
function set_limit(real)
function show_limit()
function show_trgm(text)
function similarity_dist(text,text)
function similarity_op(text,text)
function similarity(text,text)
function word_similarity_commutator_op(text,text)
function word_similarity_dist_commutator_op(text,text)
function word_similarity_dist_op(text,text)
function word_similarity_op(text,text)
function word_similarity(text,text)
operator class gin_trgm_ops for access method gin
operator class gist_trgm_ops for access method gist
operator family gin_trgm_ops for access method gin
operator family gist_trgm_ops for access method gist
operator <<->(text,text)
operator <->>(text,text)
operator <->(text,text)
operator <%(text,text)
operator %>(text,text)
operator %(text,text)
type gtrgm
(36 rows)
postgres=# create table tmp_t0(id int8,name1 varchar(100),name2 varchar(100));
postgres=#
postgres=# insert into tmp_t0(id,name1,name2)
select id,md5(id::varchar),md5(md5(id::varchar))
from generate_series(1,5000000) as id;
postgres=# \timing
postgres=# select * from tmp_t0 where name2 like '%aaaaaa%';
id | name1 | name2
---------+----------------------------------+----------------------------------
4639283 | ecbb579bba1f3296d066984873bfc485 | 0a388eb41924033749764aaaaaa99755
2089668 | e3013a6672357c768d3a1de6480b334a | 2aaaaaa184e1d4757dcacac6955dc54a
2375149 | ee7886eeb94ffa7f8593e5870a98086b | e3e0cfd338265aaaaaa9ff5247eb3f7e
3912530 | 519924f49b146e87f85a7c9ab239ad24 | d2f3aaaaaac8a84374b270b8a0bc5058
2324531 | fbed5149a49e4c99a383fe1dcf47af39 | 94e3f45166b3175252e8ddd87aaaaaa6
3543121 | d35b173585fc4e4f039e4579b05c5eeb | 8627b8186093438a1851aaaaaad68696
4687675 | d6acea73c0716946d0502a3e36699f07 | e757e2c6b5c718552baaaaaaf560c614
(7 rows)
Time: 644.033 ms
GIST
postgres=# create index idx_tmp_t0_name1 on tmp_t0 USING GIST (name1 gist_trgm_ops);
postgres=# select * from tmp_t0 where name1 like '%aaaaaa%';
GIN
postgres=# create index idx_tmp_t0_name2 on tmp_t0 USING GIN (name2 gin_trgm_ops);
postgres=# select * from tmp_t0 where name2 like '%aaaaaa%';
更多使用,可以参考官方文档
参考:
http://postgres.cn/docs/10/pgtrgm.html