Oracle全文索引的WORDLIST属性用来设置模糊查询和同词根查询,另外WORDLIST属性还支持通配符查询。
Oracle的WORDLIST属性只有BASIC_WORDLIST一种,下面看一个BASIC_WORDLIST的例子:
1.Stemmer attribute是用来查询用词根的数据
SQL> create table stemmer_tbl(id number primary key,docs clob);
Table created.
SQL> insert into stemmer_tbl values(111,'We are testing the Stemmer attribute option');
1 row created.
SQL> insert into stemmer_tbl values(112,'The girl sang like a frog');
1 row created.
SQL> insert into stemmer_tbl values(113,'My import is committing too often');
1 row created.
SQL> commit;
Commit complete.
--
-- Basic WordList Stemmer attribute
-- This example uses English for Stemming.
-- Language choices are: NULL,ENGLISH,DERIVATIONAL,DUTCH,FRENCH,GERMAN,ITALIAN
-- and SPANISH. By default we use the language of the database. If the
-- database is not one of the listed languages then we choose NULL for stemmer
-- and default for fuzzy
--
SQL> begin
2 Ctx_Ddl.Create_Preference('STEMMER_PREF', 'BASIC_WORDLIST');
3 ctx_ddl.set_attribute('STEMMER_PREF','STEMMER','ENGLISH');
4 end;
5 /
PL/SQL procedure successfully completed.
SQL> create index stemmer_idx on stemmer_tbl ( docs )
2 indextype is ctxsys.context
3 parameters ('Wordlist STEMMER_PREF');
Index created.
SQL> column docs format a20
SQL> select id,docs,score(1) from stemmer_tbl
2 where contains ( docs, '$test',1 ) > 0;
ID DOCS SCORE(1)
---------- -------------------- ----------
111 We are testing the S 4
temmer attribute opt
ion
SQL> select id,docs,score(1) from stemmer_tbl
2 where contains ( docs, '$sing',1) >0;
ID DOCS SCORE(1)
---------- -------------------- ----------
112 The girl sang like a 4
frog
第一个查询是查test的找出testing,这个不太明显;第二个是找词根是sing的数据,找到了sang的这一行数据。
2.模糊查询的功能
SQL> create table fuzzy_tbl(id number primary key,docs clob);
Table created.
SQL> insert into fuzzy_tbl values(111,'We are testing the Fuzzy attribute option in the baisic wordlist feature.');
1 row created.
SQL> insert into fuzzy_tbl values(112,'Basic is a programming language');
1 row created.
SQL> insert into fuzzy_tbl values(114,'cat');
1 row created.
SQL> insert into fuzzy_tbl values(113,'cats');
1 row created.
SQL> insert into fuzzy_tbl values(115,'calc');
1 row created.
SQL> insert into fuzzy_tbl values(116,'case');
1 row created.
SQL> commit;
Commit complete.
--
-- Basic WordList Fuzzy attribute
-- This example uses English for Fuzzy searching.
-- Language choices are: GENERIC,JAPANESE_VGRAM,KOREAN,CHINESE_VGRAM,ENGLISH,
-- DUTCH,FRENCH,GERMAN,ITALIAN,SPANISH,and OCR
-- and SPANISH. By default we use the GENERIC for fuzzy_match.
--
SQL> begin
2 Ctx_Ddl.Create_Preference('FUZZY_PREF', 'BASIC_WORDLIST');
3 ctx_ddl.set_attribute('FUZZY_PREF','FUZZY_MATCH','ENGLISH');
4 ctx_ddl.set_attribute('FUZZY_PREF','FUZZY_SCORE','0');
5 ctx_ddl.set_attribute('FUZZY_PREF','FUZZY_NUMRESULTS','5000');
6 end;
7 /
PL/SQL procedure successfully completed.
SQL> create index fuzzy_idx on fuzzy_tbl ( docs )
2 indextype is ctxsys.context
3 parameters ('Wordlist FUZZY_PREF');
Index created.
SQL> select id,docs,score(1) from fuzzy_tbl
where contains ( docs, '?basic',1 ) > 0;
ID DOCS SCORE(1)
---------- -------------------- ----------
111 We are testing the F 5
uzzy attribute optio
n in the baisic word
list feature.
112 Basic is a programmi 5
ng language
SQL> select id,docs from fuzzy_tbl where contains ( docs,'FUZZY(basic)')>0;
ID DOCS
---------- --------------------
111 We are testing the F
uzzy attribute optio
n in the baisic word
list feature.
112 Basic is a programmi
ng language
SQL> select * from fuzzy_tbl where contains
(docs,'?cat')>0;
ID DOCS
---------- --------------------
114 cat
113 cats
115 calc
116 case
第二个select查询找出类似'cat'的数据。
3.测试词根和模糊组合功能,还是用上面使用的表fuzzy_tbl
--
-- Basic WordList Fuzzy and Stemmer attributes
-- This example uses English for Fuzzy and Stemming searching.
--
SQL> begin
2 Ctx_Ddl.Create_Preference('STEM_FUZZY_PREF', 'BASIC_WORDLIST');
3 ctx_ddl.set_attribute('STEM_FUZZY_PREF','FUZZY_MATCH','ENGLISH');
4 ctx_ddl.set_attribute('STEM_FUZZY_PREF','FUZZY_SCORE','0');
5 ctx_ddl.set_attribute('STEM_FUZZY_PREF','FUZZY_NUMRESULTS','5000');
6 ctx_ddl.set_attribute('STEM_FUZZY_PREF','STEMMER','ENGLISH');
7 end;
8 /
SQL> drop index fuzzy_idx ;
Index dropped.
SQL> create index fuzzy_idx on fuzzy_tbl ( docs )
2 indextype is ctxsys.context
3 parameters ('Wordlist STEM_FUZZY_PREF');
Index created.
SQL> select id,docs,score(1) from fuzzy_tbl
2 where contains ( docs, '?basic',1 ) > 0;
ID DOCS SCORE(1)
---------- -------------------- ----------
111 We are testing the F 5
uzzy attribute optio
n in the baisic word
list feature.
112 Basic is a programmi 5
ng language
SQL> select * from fuzzy_tbl where contains
2 (docs,'?cat')>0;
ID DOCS
---------- --------------------
114 cat
113 cats
115 calc
116 case
模糊查询起作用了,下面试试词根查询
SQL> update fuzzy_tbl set doc='cat sang' where id=114;
SQL> commit;
SQL> select * from fuzzy_tbl where contains(docs,'$sing',1)>0;
no rows selected
SQL> exec ctx_ddl.sync_index('fuzzy_idx');
PL/SQL procedure successfully completed.
SQL> select * from fuzzy_tbl where contains(docs,'$sing',1)>0;
ID DOCS
---------- --------------------
114 cat sang
词根和模糊都起作用了。