create table t1(id int,name varchar2(10)); create index ind_t1 on t1(name); insert into t1 values(1,'Tom'); insert into t1 values(1,'Tom Tom'); insert into t1 select empno,ename from emp; commit; create table t2 as select * From t1; create index ind_t2 on t2(name) indextype is ctxsys.context; BEGIN DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott', tabname => 't1', estimate_percent =>100, method_opt => 'for all columns size 1', degree => 8, cascade=>TRUE ); END; BEGIN DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott', tabname => 't2', estimate_percent =>100, method_opt => 'for all columns size 1', degree => 8, cascade=>TRUE ); END; SQL> select * From t2 where contains(name,'Tom')>0; ID NAME ---------- ---------- 1 Tom 2 Tom Tom Execution Plan ---------------------------------------------------------- Plan hash value: 1615937155 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 2 | 20 | 2 (0)| 00:00:01 | | 1 | TABLE ACCESS BY INDEX ROWID| T2 | 2 | 20 | 2 (0)| 00:00:01 | |* 2 | DOMAIN INDEX | IND_T2 | | | 0 (0)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0) Statistics ---------------------------------------------------------- 11 recursive calls 0 db block gets 22 consistent gets 0 physical reads 0 redo size 660 bytes sent via SQL*Net to client 523 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 2 rows processed SQL> select * from t1 where name like '%Tom%'; ID NAME ---------- ---------- 1 Tom 2 Tom Tom Execution Plan ---------------------------------------------------------- Plan hash value: 3418867520 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 10 | 2 (0)| 00:00:01 | | 1 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 10 | 2 (0)| 00:00:01 | |* 2 | INDEX FULL SCAN | IND_T1 | 1 | | 1 (0)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL) Statistics ---------------------------------------------------------- 1 recursive calls 0 db block gets 4 consistent gets 0 physical reads 0 redo size 660 bytes sent via SQL*Net to client 523 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 2 rows processed insert into t1 select * From t1; insert into t1 select * From t1; insert into t1 select * From t1; insert into t1 select * From t1; insert into t1 select * From t1; insert into t2 select * from t2; insert into t2 select * from t2; insert into t2 select * from t2; insert into t2 select * from t2; insert into t2 select * from t2; BEGIN DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott', tabname => 't1', estimate_percent =>100, method_opt => 'for all columns size 1', degree => 8, cascade=>TRUE ); END; BEGIN DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott', tabname => 't2', estimate_percent =>100, method_opt => 'for all columns size 1', degree => 8, cascade=>TRUE ); END; SQL> select * from t1 where name like '%Tom%'; ID NAME ---------- ---------- 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 64 rows selected. Execution Plan ---------------------------------------------------------- Plan hash value: 3617692013 -------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 339 | 3390 | 7 (0)| 00:00:01 | |* 1 | TABLE ACCESS FULL| T1 | 339 | 3390 | 7 (0)| 00:00:01 | -------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL) Statistics ---------------------------------------------------------- 0 recursive calls 0 db block gets 57 consistent gets 0 physical reads 0 redo size 2272 bytes sent via SQL*Net to client 567 bytes received via SQL*Net from client 6 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 64 rows processed SQL> select * From t2 where contains(name,'Tom')>0; ID NAME ---------- ---------- 1 Tom 1 Tom Tom Execution Plan ---------------------------------------------------------- Plan hash value: 1615937155 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 848 | 8480 | 9 (0)| 00:00:01 | | 1 | TABLE ACCESS BY INDEX ROWID| T2 | 848 | 8480 | 9 (0)| 00:00:01 | |* 2 | DOMAIN INDEX | IND_T2 | | | 0 (0)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0) Statistics ---------------------------------------------------------- 11 recursive calls 0 db block gets 19 consistent gets 0 physical reads 0 redo size 660 bytes sent via SQL*Net to client 523 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 2 rows processed SQL> alter index ind_t2 rebuild parameters('sync'); Index altered. SQL> select * From t2 where contains(name,'Tom')>0; ID NAME ---------- ---------- 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 64 rows selected. Execution Plan ---------------------------------------------------------- Plan hash value: 1615937155 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 63 | 630 | 7 (0)| 00:00:01 | | 1 | TABLE ACCESS BY INDEX ROWID| T2 | 63 | 630 | 7 (0)| 00:00:01 | |* 2 | DOMAIN INDEX | IND_T2 | | | 2 (0)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0) Statistics ---------------------------------------------------------- 11 recursive calls 0 db block gets 48 consistent gets 0 physical reads 0 redo size 2272 bytes sent via SQL*Net to client 567 bytes received via SQL*Net from client 6 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 64 rows processed
从上面的测试结果可以看出,全文索引的效果不是很好,不管数量是多还是少,逻辑读都比普通索引的全表扫要高。 为什么会这样呢? 因为全文索引比表要大。 查看全文索引的大小: SQL> SET LONG 2000000000 SQL> SET pagesize 2000 SQL> SELECT ctx_report.index_size('IND_T2') FROM DUAL; CTX_REPORT.INDEX_SIZE('IND_T2') -------------------------------------------------------------------------------- =========================================================================== INDEX SIZE FOR SCOTT.IND_T2 =========================================================================== TABLE: SCOTT.DR$IND_T2$I TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 8 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 65,536 (64.00 KB) LOB SEGMENT: SCOTT.SYS_LOB0000080748C00006$$ TABLE NAME: SCOTT.DR$IND_T2$I LOB COLUMN: TOKEN_INFO TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 3 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 24,576 (24.00 KB) INDEX (NORMAL): SCOTT.DR$IND_T2$X TABLE NAME: SCOTT.DR$IND_T2$I TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 4 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 32,768 (32.00 KB) INDEX (LOB): SCOTT.SYS_IL0000080748C00006$$ TABLE NAME: SCOTT.DR$IND_T2$I TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 4 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 32,768 (32.00 KB) INDEX (IOT): SCOTT.SYS_IOT_TOP_80751 TABLE NAME: SCOTT.DR$IND_T2$K TABLESPACE NAME: USERS BLOCKS ALLOCATED: 24 BLOCKS USED: 24 BYTES ALLOCATED: 196,608 (192.00 KB) BYTES USED: 196,608 (192.00 KB) INDEX (IOT): SCOTT.SYS_IOT_TOP_80756 TABLE NAME: SCOTT.DR$IND_T2$N TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 4 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 32,768 (32.00 KB) TABLE: SCOTT.DR$IND_T2$R TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 8 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 65,536 (64.00 KB) LOB SEGMENT: SCOTT.SYS_LOB0000080753C00002$$ TABLE NAME: SCOTT.DR$IND_T2$R LOB COLUMN: DATA TABLESPACE NAME: USERS BLOCKS ALLOCATED: 24 BLOCKS USED: 24 BYTES ALLOCATED: 196,608 (192.00 KB) BYTES USED: 196,608 (192.00 KB) INDEX (LOB): SCOTT.SYS_IL0000080753C00002$$ TABLE NAME: SCOTT.DR$IND_T2$R TABLESPACE NAME: USERS BLOCKS ALLOCATED: 8 BLOCKS USED: 4 BYTES ALLOCATED: 65,536 (64.00 KB) BYTES USED: 32,768 (32.00 KB) TOTALS FOR INDEX SCOTT.IND_T2 --------------------------------------------------------------------------- TOTAL BLOCKS ALLOCATED: 104 TOTAL BLOCKS USED: 83 TOTAL BYTES ALLOCATED: 851,968 (832.00 KB) TOTAL BYTES USED: 679,936 (664.00 KB) 从上面可以看出,全文索引插入的数据都是在临时表中,需要手动同步到正式表中去alter index ind_t2 rebuild parameters('sync'),这样做的好处是,插入数据后,不会对索引进行锁定,不影正式表中对数据的操作,比较适合于数据仓库。
通过对表中继续增加数据后查看 SQL> select * from t1 where name like '%Tom%'; 8192 rows selected. Execution Plan ---------------------------------------------------------- Plan hash value: 3617692013 -------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 21709 | 212K| 276 (2)| 00:00:04 | |* 1 | TABLE ACCESS FULL| T1 | 21709 | 212K| 276 (2)| 00:00:04 | -------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL) Statistics ---------------------------------------------------------- 0 recursive calls 0 db block gets 7410 consistent gets 0 physical reads 0 redo size 216876 bytes sent via SQL*Net to client 6529 bytes received via SQL*Net from client 548 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 8192 rows processed SQL> select * From t2 where contains(name,'Tom')>0; 8320 rows selected. Execution Plan ---------------------------------------------------------- Plan hash value: 1615937155 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 4119 | 41190 | 1588 (0)| 00:00:20 | | 1 | TABLE ACCESS BY INDEX ROWID| T2 | 4119 | 41190 | 1588 (0)| 00:00:20 | |* 2 | DOMAIN INDEX | IND_T2 | | | 1377 (0)| 00:00:17 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0) Statistics ---------------------------------------------------------- 11 recursive calls 0 db block gets 6638 consistent gets 0 physical reads 0 redo size 219900 bytes sent via SQL*Net to client 6617 bytes received via SQL*Net from client 556 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 8320 rows processed SQL> select count(8) from t1; COUNT(8) ---------- 868352 通过查看可以看出,t1表和t2表中的数量大小868352,全文索引的逻辑读比普通索引的逻辑读要低,但是代价比较高。