共享池中的Latch争用
共享池中如果存在大量的SQL被反复分析,就会造成很大的Latch争用和长时间的等待,最常见到的现象是没有绑定变量。最常见的几种共享池里的Latch是:
SQL> select * from v$latchname where name like 'library cache%'
2 ;
LATCH# NAME HASH
---------- -------------------------------------------------- ----------
215 library cache 3055961779
216 library cache lock 916468430
217 library cache pin 2802704141
218 library cache pin allocation 4107073322
219 library cache lock allocation 3971284477
220 library cache load lock 2952162927
221 library cache hash chains 1130479025
7 rows selected.
在分析系统性能时,如果看到有library cache这样的Latch争用,就可以断定是共享池中出现了问题,这种问题基本上是由SQL语句导致的,比如没有绑定变量或者一些存储过程被反复分析。
下面是来自asktom.oracle.com网站上给出的一个在有绑定变量和不绑定变量情况下,latch资源争用的对比测试,测试是让一条SQL语句执行10 000次,然后给出各自的执行过程中产生的资源使用情况。
SQL>create user test identified by test default tablespace users ;
User created.
SQL>grant dba to test;
Grant succeeded.
SQL> grant select on sys.v_$statname to test;
Grant succeeded.
SQL> grant select on sys.v_$mystat to test;
Grant succeeded.
SQL> grant select on sys.v_$latch to test;
Grant succeeded.
SQL> grant select on sys.v_$timer to test;
Grant succeeded.
SQL>conn test/test
Connected.
SQL> create global temporary table run_stats
( runid varchar2(15),
name varchar2(80),
value int )
on commit preserve rows;
Table created.
SQL> create or replace view stats
2 as select 'STAT...' || a.name name, b.value
3 from v$statname a, v$mystat b
4 where a.statistic# = b.statistic#
5 union all
6 select 'LATCH.' || name, gets
7 from v$latch
8 union all
9 select 'STAT...Elapsed Time', hsecs from v$timer;
View created
SQL> create or replace package runstats_pkg
2 as
3 procedure rs_start;
4 procedure rs_middle;
5 procedure rs_stop( p_difference_threshold in number default 0 );
6 end;
7 /
Package created
SQL> create or replace package body runstats_pkg as
g_start number;
g_run1 number;
g_run2 number;
procedure rs_start is
begin
delete from run_stats;
insert into run_stats
select 'before', stats.* from stats;
g_start := dbms_utility.get_time;
end;
procedure rs_middle is
begin
g_run1 := (dbms_utility.get_time - g_start);
insert into run_stats
select 'after 1', stats.* from stats;
g_start := dbms_utility.get_time;
end;
procedure rs_stop(p_difference_threshold in number default 0) is
begin
g_run2 := (dbms_utility.get_time - g_start);
dbms_output.put_line('Run1 ran in ' || g_run1 || ' hsecs');
dbms_output.put_line('Run2 ran in ' || g_run2 || ' hsecs');
if (g_run2 <> 0) then
dbms_output.put_line('run 1 ran in ' ||
round(g_run1 / g_run2 * 100, 2) ||
'% of the time');
end if;
dbms_output.put_line(chr(9));
insert into run_stats
select 'after 2', stats.* from stats;
dbms_output.put_line(rpad('Name', 30) || lpad('Run1', 12) ||
lpad('Run2', 12) || lpad('Diff', 12));
for x in (select rpad(a.name, 30) ||
to_char(b.value - a.value, '999,999,999') ||
to_char(c.value - b.value, '999,999,999') ||
to_char(((c.value - b.value) - (b.value - a.value)),
'999,999,999') data
from run_stats a, run_stats b, run_stats c
where a.name = b.name
and b.name = c.name
and a.runid = 'before'
and b.runid = 'after 1'
and c.runid = 'after 2'
-- and (c.value-a.value) > 0
and abs((c.value - b.value) - (b.value - a.value)) >
p_difference_threshold
order by abs((c.value - b.value) - (b.value - a.value))) loop
dbms_output.put_line(x.data);
end loop;
dbms_output.put_line(chr(9));
dbms_output.put_line('Run1 latches total versus runs -- difference and pct');
dbms_output.put_line(lpad('Run1', 12) || lpad('Run2', 12) ||
lpad('Diff', 12) || lpad('Pct', 10));
for x in (select to_char(run1, '999,999,999') ||
to_char(run2, '999,999,999') ||
to_char(diff, '999,999,999') ||
to_char(round(run1 /
decode(run2, 0, to_number(0), run2) * 100,
2),
'99,999.99') || '%' data
from (select sum(b.value - a.value) run1,
sum(c.value - b.value) run2,
sum((c.value - b.value) - (b.value - a.value)) diff
from run_stats a, run_stats b, run_stats c
where a.name = b.name
and b.name = c.name
and a.runid = 'before'
and b.runid = 'after 1'
and c.runid = 'after 2'
and a.name like 'LATCH%')) loop
dbms_output.put_line(x.data);
end loop;
end;
end;
/
Package body created.
SQL>
上面的操作是创建一个测试环境,包括创建用户、相关表,以及一个捕获SQL执行中的统计数据的存储过程。
下面开始做测试对比,先创建一个表T用于插入数据:
SQL> create table t ( x int );
Table created
创建第一个存储过程p1,不使用变量绑定方式执行SQL 10 000次:
SQL> create or replace procedure p1 as
2 l_cnt number;
3 begin
4 for i in 1 .. 10000 loop
5 execute immediate 'select count(*) from t where x = ' || i
6 into l_cnt;
7 end loop;
8 end;
9 /
Procedure created
创建第二个存储过程p2,使用变量绑定方式执行SQL 10000次:
SQL> create or replace procedure p2 as
2 l_cnt number;
3 begin
4 for i in 1 .. 10000 loop
5 select count(*) into l_cnt from t where x = i;
6 end loop;
7 end;
8 /
Procedure created
SQL> exec runStats_pkg.rs_start;
PL/SQL procedure successfully completed
SQL> exec p1
PL/SQL procedure successfully completed
SQL> exec runStats_pkg.rs_middle
PL/SQL procedure successfully completed
SQL> exec p2
PL/SQL procedure successfully completed
SQL> exec runStats_pkg.rs_stop(1000)
Run1 ran in 6705 hsecs
Run2 ran in 2851 hsecs
run 1 ran in 235.18% of the time
Name Run1 Run2 Diff
STAT...table fetch by rowid 1,327 52 -1,275
STAT...no work - consistent re 1,952 75 -1,877
STAT...consistent gets - exami 1,978 55 -1,923
STAT...redo size 5,120 3,144 -1,976
STAT...buffer is not pinned co 2,801 108 -2,693
STAT...sorts (rows) 6,740 3,530 -3,210
STAT...Elapsed Time 6,713 2,853 -3,860
STAT...parse count (hard) 10,091 4 -10,087
STAT...sql area evicted 10,150 4 -10,146
STAT...enqueue requests 10,393 6 -10,387
STAT...enqueue releases 10,393 5 -10,388
STAT...parse count (total) 10,440 45 -10,395
STAT...calls to get snapshot s 50,999 40,040 -10,959
STAT...consistent gets 43,970 30,137 -13,833
STAT...consistent gets from ca 43,970 30,137 -13,833
STAT...session logical reads 44,018 30,168 -13,850
LATCH.session allocation 31,538 15,548 -15,990
LATCH.enqueue hash chains 21,783 721 -21,062
LATCH.enqueues 21,739 648 -21,091
LATCH.cache buffers chains 88,391 66,216 -22,175
STAT...recursive calls 33,554 10,367 -23,187
STAT...session uga memory max 334,528 306,904 -27,624
LATCH.library cache pin 61,192 706 -60,486
LATCH.library cache lock 62,195 1,426 -60,769
LATCH.kks stats 65,464 359 -65,105
LATCH.row cache objects 160,611 4,056 -156,555
LATCH.shared pool simulator 177,211 11,445 -165,766
LATCH.library cache 224,449 3,175 -221,274
begin runStats_pkg.rs_stop(1000); end;
ORA-20000: ORU-10027: buffer overflow, limit of 2000 bytes
ORA-06512: at "SYS.DBMS_OUTPUT", line 32
ORA-06512: at "SYS.DBMS_OUTPUT", line 97
ORA-06512: at "SYS.DBMS_OUTPUT", line 112
ORA-06512: at "TEST.RUNSTATS_PKG", line 49
ORA-06512: at line 2
SQL> set serveroutput on size 1000000
SQL> exec runStats_pkg.rs_stop(1000)
Run1 ran in 6705 hsecs
Run2 ran in 13459 hsecs
run 1 ran in 49.82% of the time
Name Run1 Run2 Diff
STAT...table fetch by rowid 1,327 52 -1,275
LATCH.SQL memory manager worka 1,532 3,145 1,613
STAT...undo change vector size 2,792 4,480 1,688
STAT...no work - consistent re 1,952 161 -1,791
STAT...redo size 5,120 7,008 1,888
STAT...consistent gets - exami 1,978 60 -1,918
STAT...buffer is not pinned co 2,801 108 -2,693
STAT...Elapsed Time 6,713 13,461 6,748
STAT...sorts (rows) 6,740 14,065 7,325
STAT...bytes sent via SQL*Net 2,344 9,691 7,347
LATCH.cache buffers chains 88,391 79,727 -8,664
STAT...parse count (hard) 10,091 8 -10,083
STAT...sql area evicted 10,150 23 -10,127
STAT...parse count (total) 10,440 90 -10,350
STAT...enqueue requests 10,393 11 -10,382
STAT...enqueue releases 10,393 10 -10,383
STAT...calls to get snapshot s 50,999 40,062 -10,937
STAT...bytes received via SQL* 2,700 14,269 11,569
LATCH.session allocation 31,538 44,001 12,463
STAT...session logical reads 44,018 30,342 -13,676
STAT...consistent gets from ca 43,970 30,253 -13,717
STAT...consistent gets 43,970 30,253 -13,717
STAT...table scan rows gotten 202 13,950 13,748
LATCH.enqueue hash chains 21,783 2,442 -19,341
LATCH.enqueues 21,739 2,249 -19,490
STAT...recursive calls 33,554 10,383 -23,171
LATCH.library cache lock 62,195 3,158 -59,037
LATCH.library cache pin 61,192 1,593 -59,599
LATCH.kks stats 65,464 888 -64,576
LATCH.row cache objects 160,611 11,034 -149,577
LATCH.shared pool simulator 177,211 13,398 -163,813
LATCH.library cache 224,449 7,511 -216,938
STAT...physical write total by 229,376 0 -229,376
LATCH.shared pool 339,188 14,216 -324,972
STAT...session uga memory 334,528 -7,328 -341,856
STAT...physical read bytes 811,008 0 -811,008
STAT...session uga memory max 334,528 1,446,296 1,111,768
STAT...physical read total byt 1,302,528 0 -1,302,528
STAT...session pga memory 851,968 -524,288 -1,376,256
STAT...session pga memory max 1,900,544 327,680 -1,572,864
Run1 latches total versus runs -- difference and pct
Run1 Run2 Diff Pct
1,259,598 190,572 -1,069,026 660.96%
PL/SQL procedure successfully completed
测试创建的性能采集包 runStats_pkg 分别在测试开始、存储过程p1运行结束、存储过程p2运行结束的3个时间点采集了性能数据,最后在报告中给出了2个存储过程各自的latch资源使用情况及对比情况。从这个结果中可以清楚地看到不绑定变量的Latch争用是非常严重的,请大家注意输出结果中几个library cache的Latch在数值的差异,可以看到,绑定变量时要比不绑定变量时这些Latch争用小得多。
如果你的数据库存在这几种Latch争用,大多数时候要考察你的系统的SQL变量绑定情况。
关于如何确定系统中是否存在绑定变量的情况,ASKTOM网站也提供了一个不错的函数remove_constans()来检查共享池中的SQL的运行情况。
首先创建一个表,用于存放整理过的数据:
SQL> create table t1 as select sql_text from v$sqlarea;
Table created
给表增加一个字段:
SQL> alter table t1 add sql_text_wo_constants varchar2(1000);
Table altered
创建函数remove_constants:
create or replace function
remove_constants( p_query in varchar2 ) return varchar2
as
l_query long;
l_char varchar2(1);
l_in_quotes boolean default FALSE;
begin
for i in 1 .. length( p_query )
loop
l_char := substr(p_query,i,1);
if ( l_char = '''' and l_in_quotes )
then
l_in_quotes := FALSE;
elsif ( l_char = '''' and NOT l_in_quotes )
then
l_in_quotes := TRUE;
l_query := l_query || '''#';
end if;
if ( NOT l_in_quotes ) then
l_query := l_query || l_char;
end if;
end loop;
l_query := translate( l_query, '0123456789', '@@@@@@@@@@' );
for i in 0 .. 8 loop
l_query := replace( l_query, lpad('@',10-i,'@'), '@' );
l_query := replace( l_query, lpad(' ',10-i,' '), ' ' );
end loop;
return upper(l_query);
end;
/
下面是如何使用这个函数。
将v$sql视图中的数据用remove_constants处理后,更新到t1表中:
SQL> update t1 set sql_text_wo_constants = remove_constants(sql_text);
649 rows updated
查出除了谓词条件不同的SQL语句和它们的执行次数,在这里是查询SQL没有被重用超过100次的SQL语句:
select sql_text_wo_constants, count(*) from t1 group by sql_text_wo_constants having count(*) > 100 order by 2;
/
以下是一个测试的例子,我们使用一个循环执行1000次某条SQL,每次执行时只有谓词不同:
SQL> begin
2 for i in 1 .. 1000 loop
3 execute immediate 'select count(*) from t where x = ' || i;
4 end loop;
5 end;
6 /
PL/SQL procedure successfully completed
SQL> select sql_text_wo_constants, count(*)
2 from t1
3 group by sql_text_wo_constants
4 having count(*) > 100 ---可以修改成你希望的次数
5 order by 2
6 ;
SQL_TEXT_WO_CONSTANTS COUNT(*)
-------------------------------------------------------------------------------- ----------
SELECT * FROM T WHERE X=@
1000
可以看到输出结果中,这条语句被执行了1000次,其中谓词条件被“@”代替,这样通过这个函数,可以很容易地找到共享池中哪些SQL没有绑定变量。
整理之网络