全文索引

create table t1(id int,name varchar2(10));
create index ind_t1 on t1(name);
insert into t1 values(1,'Tom');
insert into t1 values(1,'Tom Tom');
insert into t1 select empno,ename from emp;
commit;

create table t2  as select * From t1;
create index ind_t2 on t2(name) indextype is ctxsys.context;

BEGIN
         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',
         tabname => 't1',
         estimate_percent =>100,
          method_opt => 'for all columns size 1',
        degree => 8,
         cascade=>TRUE
         );
         END;

BEGIN
         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',
         tabname => 't2',
         estimate_percent =>100,
          method_opt => 'for all columns size 1',
        degree => 8,
         cascade=>TRUE
         );
         END;

SQL> select  * From t2 where contains(name,'Tom')>0;
	ID NAME
---------- ----------
	 1 Tom
	 2 Tom Tom
Execution Plan
----------------------------------------------------------
Plan hash value: 1615937155

--------------------------------------------------------------------------------------
| Id  | Operation		    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	     |	   2 |	  20 |	   2   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T2     |	   2 |	  20 |	   2   (0)| 00:00:01 |
|*  2 |   DOMAIN INDEX		    | IND_T2 |	     |	     |	   0   (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)
Statistics
----------------------------------------------------------
	 11  recursive calls
	  0  db block gets
	 22  consistent gets
	  0  physical reads
	  0  redo size
	660  bytes sent via SQL*Net to client
	523  bytes received via SQL*Net from client
	  2  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	  2  rows processed

SQL> select * from t1 where name like '%Tom%';
	ID NAME
---------- ----------
	 1 Tom
	 2 Tom Tom
Execution Plan
----------------------------------------------------------
Plan hash value: 3418867520
--------------------------------------------------------------------------------------
| Id  | Operation		    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	     |	   1 |	  10 |	   2   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T1     |	   1 |	  10 |	   2   (0)| 00:00:01 |
|*  2 |   INDEX FULL SCAN	    | IND_T1 |	   1 |	     |	   1   (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   2 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)
Statistics
----------------------------------------------------------
	  1  recursive calls
	  0  db block gets
	  4  consistent gets
	  0  physical reads
	  0  redo size
	660  bytes sent via SQL*Net to client
	523  bytes received via SQL*Net from client
	  2  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	  2  rows processed
insert into t1 select * From t1;
insert into t1 select * From t1;
insert into t1 select * From t1;
insert into t1 select * From t1;
insert into t1 select * From t1;


insert into t2 select * from t2;
insert into t2 select * from t2;
insert into t2 select * from t2;
insert into t2 select * from t2;
insert into t2 select * from t2;

BEGIN
         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',
         tabname => 't1',
         estimate_percent =>100,
          method_opt => 'for all columns size 1',
        degree => 8,
         cascade=>TRUE
         );
         END;

BEGIN
         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',
         tabname => 't2',
         estimate_percent =>100,
          method_opt => 'for all columns size 1',
        degree => 8,
         cascade=>TRUE
         );
         END;
SQL> select * from t1 where name like '%Tom%';


	ID NAME
---------- ----------
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
	 1 Tom Tom
64 rows selected.

Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation	  | Name | Rows  | Bytes | Cost (%CPU)| Time	 |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |	 |   339 |  3390 |     7   (0)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| T1	 |   339 |  3390 |     7   (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)
Statistics
----------------------------------------------------------
	  0  recursive calls
	  0  db block gets
	 57  consistent gets
	  0  physical reads
	  0  redo size
       2272  bytes sent via SQL*Net to client
	567  bytes received via SQL*Net from client
	  6  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	 64  rows processed

SQL> select  * From t2 where contains(name,'Tom')>0;
	ID NAME
---------- ----------
	 1 Tom
	 1 Tom Tom

Execution Plan
----------------------------------------------------------
Plan hash value: 1615937155
--------------------------------------------------------------------------------------
| Id  | Operation		    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	     |	 848 |	8480 |	   9   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T2     |	 848 |	8480 |	   9   (0)| 00:00:01 |
|*  2 |   DOMAIN INDEX		    | IND_T2 |	     |	     |	   0   (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)
Statistics
----------------------------------------------------------
	 11  recursive calls
	  0  db block gets
	 19  consistent gets
	  0  physical reads
	  0  redo size
	660  bytes sent via SQL*Net to client
	523  bytes received via SQL*Net from client
	  2  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	  2  rows processed

SQL> alter index ind_t2 rebuild parameters('sync');
Index altered.

SQL> select  * From t2 where contains(name,'Tom')>0;
	ID NAME
---------- ----------
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom
	 1 Tom
	 1 Tom Tom


64 rows selected.

Execution Plan
----------------------------------------------------------
Plan hash value: 1615937155
--------------------------------------------------------------------------------------
| Id  | Operation		    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	     |	  63 |	 630 |	   7   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T2     |	  63 |	 630 |	   7   (0)| 00:00:01 |
|*  2 |   DOMAIN INDEX		    | IND_T2 |	     |	     |	   2   (0)| 00:00:01 |
--------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)
Statistics
----------------------------------------------------------
	 11  recursive calls
	  0  db block gets
	 48  consistent gets
	  0  physical reads
	  0  redo size
       2272  bytes sent via SQL*Net to client
	567  bytes received via SQL*Net from client
	  6  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	 64  rows processed

从上面的测试结果可以看出,全文索引的效果不是很好,不管数量是多还是少,逻辑读都比普通索引的全表扫要高。
为什么会这样呢? 因为全文索引比表要大。
查看全文索引的大小:
SQL> SET LONG 2000000000
SQL> SET pagesize 2000
SQL> SELECT ctx_report.index_size('IND_T2') FROM DUAL;

CTX_REPORT.INDEX_SIZE('IND_T2')
--------------------------------------------------------------------------------
===========================================================================
			INDEX SIZE FOR SCOTT.IND_T2
===========================================================================
TABLE:				     SCOTT.DR$IND_T2$I
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 8
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 65,536 (64.00 KB)

LOB SEGMENT:			     SCOTT.SYS_LOB0000080748C00006$$
TABLE NAME:			     SCOTT.DR$IND_T2$I
LOB COLUMN:			     TOKEN_INFO
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 3
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 24,576 (24.00 KB)

INDEX (NORMAL): 		     SCOTT.DR$IND_T2$X
TABLE NAME:			     SCOTT.DR$IND_T2$I
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 4
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 32,768 (32.00 KB)

INDEX (LOB):			     SCOTT.SYS_IL0000080748C00006$$
TABLE NAME:			     SCOTT.DR$IND_T2$I
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 4
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 32,768 (32.00 KB)

INDEX (IOT):			     SCOTT.SYS_IOT_TOP_80751
TABLE NAME:			     SCOTT.DR$IND_T2$K
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							24
BLOCKS USED:								24
BYTES ALLOCATED:				       196,608 (192.00 KB)
BYTES USED:					       196,608 (192.00 KB)

INDEX (IOT):			     SCOTT.SYS_IOT_TOP_80756
TABLE NAME:			     SCOTT.DR$IND_T2$N
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 4
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 32,768 (32.00 KB)

TABLE:				     SCOTT.DR$IND_T2$R
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 8
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 65,536 (64.00 KB)

LOB SEGMENT:			     SCOTT.SYS_LOB0000080753C00002$$
TABLE NAME:			     SCOTT.DR$IND_T2$R
LOB COLUMN:			     DATA
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							24
BLOCKS USED:								24
BYTES ALLOCATED:				       196,608 (192.00 KB)
BYTES USED:					       196,608 (192.00 KB)

INDEX (LOB):			     SCOTT.SYS_IL0000080753C00002$$
TABLE NAME:			     SCOTT.DR$IND_T2$R
TABLESPACE NAME:		     USERS
BLOCKS ALLOCATED:							 8
BLOCKS USED:								 4
BYTES ALLOCATED:					 65,536 (64.00 KB)
BYTES USED:						 32,768 (32.00 KB)

TOTALS FOR INDEX SCOTT.IND_T2
---------------------------------------------------------------------------
TOTAL BLOCKS ALLOCATED: 					       104
TOTAL BLOCKS USED:							83
TOTAL BYTES ALLOCATED:				       851,968 (832.00 KB)
TOTAL BYTES USED:				       679,936 (664.00 KB)

从上面可以看出,全文索引插入的数据都是在临时表中,需要手动同步到正式表中去alter index ind_t2 rebuild parameters('sync'),这样做的好处是,插入数据后,不会对索引进行锁定,不影正式表中对数据的操作,比较适合于数据仓库。


 
 
通过对表中继续增加数据后查看
SQL> select * from t1 where name like '%Tom%';
8192 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation	  | Name | Rows  | Bytes | Cost (%CPU)| Time	 |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |	 | 21709 |   212K|   276   (2)| 00:00:04 |
|*  1 |  TABLE ACCESS FULL| T1	 | 21709 |   212K|   276   (2)| 00:00:04 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)
Statistics
----------------------------------------------------------
	  0  recursive calls
	  0  db block gets
       7410  consistent gets
	  0  physical reads
	  0  redo size
     216876  bytes sent via SQL*Net to client
       6529  bytes received via SQL*Net from client
	548  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
       8192  rows processed
SQL> select  * From t2 where contains(name,'Tom')>0;
8320 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 1615937155
--------------------------------------------------------------------------------------
| Id  | Operation		    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	     |	4119 | 41190 |	1588   (0)| 00:00:20 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T2     |	4119 | 41190 |	1588   (0)| 00:00:20 |
|*  2 |   DOMAIN INDEX		    | IND_T2 |	     |	     |	1377   (0)| 00:00:17 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)

Statistics
----------------------------------------------------------
	 11  recursive calls
	  0  db block gets
       6638  consistent gets
	  0  physical reads
	  0  redo size
     219900  bytes sent via SQL*Net to client
       6617  bytes received via SQL*Net from client
	556  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
       8320  rows processed
SQL> select count(8) from t1;
  COUNT(8)
----------
    868352

通过查看可以看出,t1表和t2表中的数量大小868352,全文索引的逻辑读比普通索引的逻辑读要低,但是代价比较高。





你可能感兴趣的:(全文索引)