我们假定有一张师范学院学生信息表
drop table student_info purge;
create table student_info(
student_no number(10),
gendar char(1)
);
插入模拟数据:
begin
execute immediate 'truncate tablestudent_info';
for i in 1..20000 loop
insert intostudent_info(student_no,gendar)
values(
trunc(dbms_random.value(10000000,99999999)),
case when mod(trunc(dbms_random.value(1,99999999)),100)<1 then 1 else 0 end
);
end loop;
end;
select count(*),gendar from student_info group by gendar
------------
206 1
19794 0
其中95%左右的学生是女性,如果我们要在gendar上建立索引
以下方法是不经济的(对该表的使用不是累计查询)
create index idx_student_gendar onstudent_info(gendar)
因为如果查询女生信息,由于绝大多数学生是女生,所以一般应用都是全表扫描,既如此为何还要在索引中,保存女生的信息呢?
如果建立以下索引
create index idx_student_gendar_fun onstudent_info(case when gendar =0 then null else gendar end);
因为btree索引不保存null信息,男学生人数有很少,所以这个索引又小,又能在查询难学生信息的时候用上索引。
比较索引
create indexidx_student_gendar_fun on student_info(case when gendar =0 then null else gendar end);
analyze indexidx_student_gendar_fun validate structure
select * from index_stats
drop indexidx_student_gendar_fun
create indexidx_student_gendar_com on student_info(gendar)
analyze indexidx_student_gendar_com validate structure
select * from index_stats
drop indexidx_student_gendar_com
得到以下结果
HEIGHT |
1 |
2 |
BLOCKS |
8 |
48 |
NAME |
IDX_STUDENT_GENDAR_FUN |
IDX_STUDENT_GENDAR_COM |
PARTITION_NAME |
||
LF_ROWS |
206 |
20000 |
LF_BLKS |
1 |
37 |
LF_ROWS_LEN |
2678 |
260000 |
LF_BLK_LEN |
8000 |
8000 |
BR_ROWS |
0 |
36 |
BR_BLKS |
0 |
1 |
BR_ROWS_LEN |
0 |
538 |
BR_BLK_LEN |
0 |
8032 |
DEL_LF_ROWS |
0 |
0 |
DEL_LF_ROWS_LEN |
0 |
0 |
DISTINCT_KEYS |
1 |
2 |
MOST_REPEATED_KEY |
206 |
19794 |
BTREE_SPACE |
8000 |
304032 |
USED_SPACE |
2678 |
260538 |
PCT_USED |
34 |
86 |
ROWS_PER_KEY |
206 |
10000 |
BLKS_GETS_PER_ACCESS |
104.5 |
5002.5 |
PRE_ROWS |
0 |
0 |
PRE_ROWS_LEN |
0 |
0 |
OPT_CMPR_COUNT |
1 |
1 |
OPT_CMPR_PCTSAVE |
15 |
15 |
看得出无论索引的高度还是索引大小,第一个索引都有明显的优势。
实际上, idx_student_gendar_fun就是一个用oracle内建函数建立的函数索引。
Oracle还允许用用户自定义函数建立函数索引(在讨论表的约束的时候,我们会了解到不能用自定义函数定义约束)。
针对上述函数索引,我们还可以用if来编写自定义函数
create or replace function f_gendar(p_gendar in varchar2)
return varchar2
DETERMINISTIC
as
begin
if ( p_gendar = 1 ) then
return p_gendar;
else
return NULL;
end if;
end;
比较自定义函数与
create indexidx_student_gendar_ufun on student_info(f_gendar(gendar));
analyze indexidx_student_gendar_ufun validate structure
select * from index_stats
drop indexidx_student_gendar_fun
HEIGHT |
1 |
2 |
1 |
BLOCKS |
8 |
48 |
8 |
NAME |
IDX_STUDENT_GENDAR_FUN |
IDX_STUDENT_GENDAR_COM |
IDX_STUDENT_GENDAR_UFUN |
PARTITION_NAME |
|||
LF_ROWS |
206 |
20000 |
206 |
LF_BLKS |
1 |
37 |
1 |
LF_ROWS_LEN |
2678 |
260000 |
2678 |
LF_BLK_LEN |
8000 |
8000 |
8000 |
BR_ROWS |
0 |
36 |
0 |
BR_BLKS |
0 |
1 |
0 |
BR_ROWS_LEN |
0 |
538 |
0 |
BR_BLK_LEN |
0 |
8032 |
0 |
DEL_LF_ROWS |
0 |
0 |
0 |
DEL_LF_ROWS_LEN |
0 |
0 |
0 |
DISTINCT_KEYS |
1 |
2 |
1 |
MOST_REPEATED_KEY |
206 |
19794 |
206 |
BTREE_SPACE |
8000 |
304032 |
8000 |
USED_SPACE |
2678 |
260538 |
2678 |
PCT_USED |
34 |
86 |
34 |
ROWS_PER_KEY |
206 |
10000 |
206 |
BLKS_GETS_PER_ACCESS |
104.5 |
5002.5 |
104.5 |
PRE_ROWS |
0 |
0 |
0 |
PRE_ROWS_LEN |
0 |
0 |
0 |
OPT_CMPR_COUNT |
1 |
1 |
1 |
OPT_CMPR_PCTSAVE |
15 |
15 |
15 |
我们知道函数返回类型如果是varchar2类型,则缺省长度是4000;在9i数据库中,这个特性会影响索引的尺寸,我们知道gender是char(1),因此substr(f_gendar(gendar),1,1)返回1位在建立索引。
create indexidx_student_gendar_ufun on student_info(substr(f_gendar(gendar),1,1));
analyze indexidx_student_gendar_ufun validate structure
select * from index_stats
drop indexidx_student_gendar_ufun
HEIGHT |
1 |
1 |
BLOCKS |
8 |
8 |
NAME |
IDX_STUDENT_GENDAR_UFUN |
IDX_STUDENT_GENDAR_UFUN |
PARTITION_NAME |
||
LF_ROWS |
206 |
206 |
LF_BLKS |
1 |
1 |
LF_ROWS_LEN |
2678 |
2678 |
LF_BLK_LEN |
8000 |
8000 |
BR_ROWS |
0 |
0 |
BR_BLKS |
0 |
0 |
BR_ROWS_LEN |
0 |
0 |
BR_BLK_LEN |
0 |
0 |
DEL_LF_ROWS |
0 |
0 |
DEL_LF_ROWS_LEN |
0 |
0 |
DISTINCT_KEYS |
1 |
1 |
MOST_REPEATED_KEY |
206 |
206 |
BTREE_SPACE |
8000 |
8000 |
USED_SPACE |
2678 |
2678 |
PCT_USED |
34 |
34 |
ROWS_PER_KEY |
206 |
206 |
BLKS_GETS_PER_ACCESS |
104.5 |
104.5 |
PRE_ROWS |
0 |
0 |
PRE_ROWS_LEN |
0 |
0 |
OPT_CMPR_COUNT |
1 |
1 |
OPT_CMPR_PCTSAVE |
15 |
15 |
这是在我11g数据库上测试的结果,在11g数据库中解决了这个问题,无需sunstr函数
在自定义函数中,我们使用了一个关键字DETERMINISTIC,这个关键字是说,如果函数的输入值相同,则函数返回结果也一定相同。
Deterministic说明
在这里给出一个非deterministic的函数的例子
create or replace function f_return(p_1 varchar2)
return integer deterministic is
v_int integer;
begin
dbms_output.put_line(p_1);
v_int:=floor(dbms_random.value(1,5))*10;
return v_int;
end;
select f_return('jjj') from dual
每一次输出的值 不一样
drop table emp_ttt purge
create table emp_ttt as select ename from emp
select f_return(ename) from emp
每一次输出的值 依旧不一样
create index idx_emp_ttt_emp on emp_ttt(f_return(ename))
select ename,f_return(ename) from emp_ttt
where f_return(ename)=10
select ename,f_return(ename) r from emp_ttt where f_return(ename)=10
SELECT STATEMENT, GOAL = ALL_ROWS 2 2 1
TABLE ACCESS BY INDEX ROWID SCOTT EMP_TTT 2 2 1
INDEX RANGE SCAN SCOTT IDX_EMP_TTT_EMP 1 1 1
每一次输出的价值是一样的
这就是deterministic的意思,每一次输入相同的输入值,则返回相同的返回值
如果函数索引的运算中使用的原数据(如基于某表的数据)发生了变化,则函数索引需要重新建立
create or replace function f_return(p_empno number)
return varchar2 deterministic is
v_ename varchar2(20);
begin
select max(ename) into v_ename
from
(
select ename from emp where empno=p_empno
union all
select null ename from dual
);
return v_ename;
end;
update emp set
empno=7781
where empno=7782
select f_return('7781') from dual
1 CLARK
select * from emp_ttt where f_return(empno)='CLARK'
--------------------
7782 CLARK
alter index idx_emp_ttt_empno rebuild
select * from emp_ttt where f_return(empno)='CLARK'
------------------
返回null
select empno,f_return(empno),empno from emp
函数索引的调用
create indexidx_student_gendar_ufun on student_info(substr(f_gendar(gendar),1,1));
analyze table student_info compute statistics
for table
for all indexes
for all indexed columns
explain plan for
select * from student_info wheresubstr(f_gendar(gendar),1,1)='1';
select * from table(dbms_xplan.display())
-------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 20 | 220 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID|STUDENT_INFO | 20 | 220 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IDX_STUDENT_GENDAR_UFUN | 20 | | 1 (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------------
我们看到这样用函数索引比较麻烦
可以用建立一个包含函数运算的列的视图,来解决这个问题
create or replace view v_student_info
as select
student_no,
gendar,
substr(f_gendar(gendar),1,1) v_gendar
fromstudent_info
insert intov_student_info(student_no,gendar) values(12345678,'1')
successful
explain plan for
select * from v_student_info where v_gendar='1';
select * from table(dbms_xplan.display())
-------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 20 | 220 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID|STUDENT_INFO | 20 | 220 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IDX_STUDENT_GENDAR_UFUN | 20 | | 1 (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------------
explain plan for
updatev_student_info
set gendar=0
wherev_gendar='1' and student_no=31061887
--------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------------------
| 0 | UPDATE STATEMENT | | 1 | 11 | 2 (0)| 00:00:01 |
| 1 | UPDATE | STUDENT_INFO | | | | |
|* 2 | TABLE ACCESS BY INDEX ROWID|STUDENT_INFO | 1 | 11 | 2 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | IDX_STUDENT_GENDAR_UFUN | 20 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------------
函数索引的使用实际上用到了oracle的查询重写技术,因此要设定查询重写参数:
alter session setquery_rewrite_enabled=true;
alter session setquery_rewrite_integrity=trusted;
如果函数索引基于的函数业务规则(逻辑或者所用的数据源)发生变化,函数索引需要重建。
还可以建立函数位图索引
drop indexidx_student_gendar_ufun;
create bitmap indexidx_student_gendar_ufun on student_info(substr(f_gendar(gendar),1,1));
analyze indexidx_student_gendar_ufun validate structure
select * from index_stats
drop indexidx_student_gendar_ufun
函数索引函数被调用次数
建立索引函数后,如果再一个查询中,在where的过滤条件中,使用了该函数索引,那么该函数被调用的次数是多少? 1次 0次 还是涉及到行的行数。
create table emp_deterministic (empno number primary key, comm number ) organization index;
insert into emp_deterministic
select object_id, decode( mod(rownum,2), 0, object_id, null )
from all_objects
where rownum <= 1000;
analyze table emp_deterministic compute statistics;
因此表emp_deterministic拥有了1000行数据,并且empno为500个1 500个0值,COMM列为空值。
建立函数
create or replace functionyour_bonus( p_empno in number ) return number
deterministic
as
l_comm number;
begin
dbms_application_info.set_client_info(sys_context( 'userenv', 'client_info')+1 );
select comm
into l_comm
from emp
where empno =p_empno;
return l_comm;
exception
whenno_data_found then
raise program_error;
end;
可以用这个函数数出这个这个函数被调用的次数
begin
dbms_application_info.set_client_info(0);
end;
selectyour_bonus(empno) b from emp_deterministic;
1000 rows selected.
ExecutionPlan
----------------------------------------------------------
0 SELECT STATEMENTOptimizer=CHOOSE (Cost=2 Card=1000 Bytes=3000)
1 0 INDEX (FAST FULL SCAN) OF 'SYS_IOT_TOP_38955' (UNIQUE) (Cost=2 Card=1000 Bytes=3000)
selectsys_context( 'userenv', 'client_info') sys_ctx from dual;
SYS_CTX
--------------------
1000
被调用1000次,这个结果是我们所预料到的。
Begin
dbms_application_info.set_client_info(0);
end;
select your_bonus(empno) b from emp_deterministic
whereyour_bonus(empno) > 0;
500 rows selected.
ExecutionPlan
----------------------------------------------------------
0 SELECT STATEMENTOptimizer=CHOOSE (Cost=2 Card=50 Bytes=150)
1 0 INDEX (FAST FULL SCAN) OF 'SYS_IOT_TOP_38955' (UNIQUE) (Cost=2 Card=50 Bytes=150)
selectsys_context( 'userenv', 'client_info') sys_ctx from dual;
SYS_CTX
--------------------
1500
这次的结果是函数被调用了1500次,这出乎我们的意料,返回500条数据,应该是500次呀?
实际上,这是因为我们预取1000条数据到缓存,再取了500条输出,一共是1500条。
Begin
dbms_application_info.set_client_info(0);
end;
select b
from (select your_bonus(empno) b
from emp_deterministic
where rownum > 0 ) EMP
where b > 0;
500 rows selected.
ExecutionPlan
----------------------------------------------------------
0 SELECT STATEMENTOptimizer=CHOOSE (Cost=2 Card=1000 Bytes=13000)
1 0 VIEW (Cost=2 Card=1000 Bytes=13000)
2 1 COUNT
3 2 FILTER
4 3 INDEX (FAST FULL SCAN) OF 'SYS_IOT_TOP_38955' (UNIQUE) (Cost=2 Card=1000
Bytes=3000)
selectsys_context( 'userenv', 'client_info') sys_ctx from dual;
SYS_CTX
--------------------
1068
rownum is very handy for helping us out there. The optimizer cannot merge the predicate "where b > 0" into thesubquery due to that. So, your_bonus(empno) is materialized into"temp" in effect and reused. Now you might wonder why 1,068? well, if you
setarraysize 1000
begin
dbms_application_info.set_client_info(0);
end;
set autotrace traceonly
select b
from (select your_bonus(empno) b
from emp
where rownum > 0 ) EMP
where b > 0;
500 rows selected.
selectsys_context( 'userenv', 'client_info') sys_ctx from dual;
SYS_CTX
--------------------
1002
You can see it is a function of the number of times we fetched from this result set. Larger array fetches will reduce this number
create index bonus_idx on emp(your_bonus(empno));
Index created.
alter session setquery_rewrite_enabled=true;
alter session setquery_rewrite_integrity=trusted;
begin
dbms_application_info.set_client_info(0);
end;
selectyour_bonus(empno) b from emp where your_bonus(empno)> 0;
500 rows selected.
ExecutionPlan
----------------------------------------------------------
0 SELECT STATEMENTOptimizer=CHOOSE (Cost=1 Card=50 Bytes=150)
1 0 INDEX (RANGE SCAN) OF 'BONUS_IDX' (NON-UNIQUE) (Cost=2 Card=50 Bytes=150)
selectsys_context( 'userenv', 'client_info') sys_ctx from dual;
SYS_CTX
--------------------
0
发现这一次调用函数0次。
这是隐藏着的函数索引的价值所在。由于采用查询重写技术,在引用函数索引的时候,避免了每行数据都去调用函数。
原创文章,如果转载,请标注作者:田文 CSDN地址:http://blog.csdn.net/tiwen818