--根据某一对象查询进程 col owner for a10 col object for a20 col type for a15 select /*+RULE*/ * from v$access a where object='P_CHK_QUALITY_GMCC'; --查询当前数据库异常连接数 set line 240 col MACHINE for a25 col PROGRAM for a30 col USERNAME for a16 col OSUSER for a12 col event for a30 select username,osuser,machine,program,event,count(*) from v$session where username not in ('SYS','TRANSDATA') group by username,osuser,machine,program,event order by 4 ; --根据用户批量生成kill -9 set line 200 pagesize 9999 select 'kill -9 '||spid from v$process p, v$session s where p.addr=s.paddr and s.username='JMCBSPARAM1'; --按用户批量生成alter system kill session set line 200 pagesize 9999 select 'alter system kill session '''|| s.sid||','||s.serial#||''';' from v$process p, v$session s where p.addr=s.paddr and s.username='CCMNT'; --按操作系统用户批量生成ps -ef | grep spid| grep LOCAL=NO|awk '{print $2}'|xargs kill -9 set line 200 pagesize 9999 select 'ps -ef | grep '||p.spid||'| grep LOCAL=NO|awk ''{print $2}''|xargs kill -9' from v$process p, v$session s where p.addr=s.paddr and s.osuser='zwbill'; --按机器名批量生成kill和alter system kill session col process for 99999 select 'kill -9 '||spid, 'alter system kill session '''|| s.sid||','||s.serial#||''';' kill, s.machine,s.process from v$process p, v$session s where p.addr=s.paddr and s.machine='CRMCSWAS1'; --按等待事件批量生成alter system kill session set line 200 pagesize 9999 select 'alter system kill session '''|| s.sid||','||s.serial#||''';' from ( select sid,serial#,username,program,machine,event,sql_id from v$session where event='library cache lock') s; ************************************************************************************************************** --源头查找 select BLOCKING_SESSION,count(*) from v$session group by BLOCKING_SESSION having count(*)>10; select p.spid from v$session s,v$process p where s.paddr=p.addr and s.sid in(3209); col program for a20 col machine for a20 col event for a30 select sid,serial#,username,program,machine,event from v$session where event='library cache lock'; set line 800 pagesize 200 long 99999; col sname for a10; col pname for a10; col machine for a15; col sprogram for a20; col pprogram for a20; col event for a40; select p.spid,p.username pname,p.program,s.username sname,s.status,s.machine,q.sql_text from v$process p,v$session s,v$sql q where p.addr=s.paddr and s.sql_id=q.sql_id and s.sid=&sid and p.spid='&spid'; ######################################################################################################################################### 初步定位故障原因(15分钟内) a) 检查是否有异常等待 Select inst_id, event,count(*) from gv$session where status='ACTIVE' and wait_class#<> 6 group by inst_id, event having count(*)>10; --全局视图无法查询出结果时用下列语句: Select event,count(*) from v$session where status='ACTIVE' and wait_class#<> 6 group by event having count(*)>10; b) 如有大量异常等待出现(异常进程30个以上),执行hang分析,获取数据库堵塞情况: sqlplus '/as sysdba' oradebug setmypid oradebug unlimit; oradebug hanganalyze 3 c) 通过trace找到 hang的源头进程,包括SID和操作系统PID, 源头进程一般有两种类型: ? 等待事件为“NO WAIT”。 ? 同时分析堵塞其他大量进程的源头进程。 ? 分析HANG源头,然后终止该进程 如下: ============== HANG ANALYSIS: ============== Found 89 objects waiting for <cnode/sid/sess_srno/proc_ptr/ospid/wait_event> <1/4259/39290/0xc9982a0/3102104/enq: DX - contention> Found 41 objects waiting for <cnode/sid/sess_srno/proc_ptr/ospid/wait_event> <1/4108/28291/0x9acbba8/1115164/gc current request> Found 99 objects waiting for <cnode/sid/sess_srno/proc_ptr/ospid/wait_event> <1/4420/44106/0xeaf6b28/684548/gc current request> Found 30 objects waiting for <cnode/sid/sess_srno/proc_ptr/ospid/wait_event> <1/3469/53674/0xca8ad00/4354862/gc current request> Found 58 objects waiting for <cnode/sid/sess_srno/proc_ptr/ospid/wait_event> <1/1944/53978/0xa9e6340/4096640/gc current request> d) 分析故障发生期间的主要堵塞进程: SQL>select BLOCKING_SESSION,count(*) from v$session group by BLOCKING_SESSION having count(*)>10; 3469 30 4420 99 4108 41 进一步分析以上3个进程(3469,4420,4108) select sid,username,sql_id,event from v$session where sid in (3469,4420,4108); SID USERNAME SQL_ID EVENT ---------- ---------- ------------- ----------------------------------- 3469 JMYY gxn9d9fwz893r enq: TX - index contention 4108 JMYY f7uncmzyf400n gc current request 4420 JMYY gxn9d9fwz893r gc current request 根据SQL_ID可找出相应SQL语句,另外结合HANG ANALYSIS内容和以上结果,可分析出3469,4108和4420为源头(进一步分析可确认出具体那个为源头),以排除故障优先考虑先终止以上3个进程(终止前要确认为非本地进程),终止2分钟后查询v$session分析数据库性能指标是否恢复正常! 如果以上执行后系统仍未恢复正常...在征求局方负责人同意后将所有LOCAL=NO的进程中止,然后再次确认数据库是否恢复正常! ps -ef|grep LOCAL=NO|awk '{print $2}'|xargs kill -9 e) 检查堵塞者session等待事件 set line 200 select osuser,event,count(*) from v$session where wait_class#<>6 and status='ACTIVE' group by osuser,event; 如果堵塞者进程出现latch: cache buffers chains等待,表明堵塞者进程出现热块争用现象。 通过以下SQL语句检查所有出现latch: cache buffers chains的进程,并分析执行计划的合理性。 set line 200 pagesize 9999 col PROGRAM for a20 col machine for a20 col event for a20 select sid,serial#,osuser,program,machine,event,sql_id from v$session where event='library cache lock'; ######################################################################################################################################################################### --查询当前实例信息 col host_name for a12; set line 400 pagesize 200; col INST_ID for 7 col MACHINE for a25 col PROGRAM for a50 col USERNAME for a16 col INST_ID for 9999999 select inst_id,username,machine,program,STATUS,count(*) from gv$session --where status='ACTIVE' group by inst_id,username,machine,program,STATUS having count(*) > 100 order by 4 ; --查询当前实例信息 select instance_number,instance_name,host_name,version,status from v$instance; ######################################################################################################################################################################## select BLOCKING_SESSION,event,count(*) FROM V$SESSION WHERE EVENT='library cache lock' group by BLOCKING_SESSION,event; 双节点: select INST_ID,SID,SERIAL#,BLOCKING_INSTANCE, BLOCKING_SESSION - 1 FROM GV$SESSION WHERE EVENT='library cache lock'; BLOCKING_INSTANCE 阻塞实例 BLOCKING_SESSION – 1 阻塞SID 注意:v$session中的blocking_session 应该是阻塞的会话id,不应该减一,这个是oracle的bug 5481650。注意在升级oracle补丁集10.2.0.4后这个脚本需要修改。 可以查到等待TX锁的进程,被哪个实例的哪个SID阻塞,然后使用下面的SQL语句查询阻塞进程的信息,针对局方同意后中止相关进程,并记录相关信息: col username format a10 col program format a30 col sql_text format a50 col machine format a30 select a.sid,a.serial#,c.spid,a.username,a.program,a.machine,b.sql_text from v$session a,v$sql b,v$process c where a.paddr=c.addr and a.sid='&SID'; --中止进程 select 'kill -9 '||spid, 'alter system kill session '''|| s.sid||','||s.serial#||''';', s.machine,s.process from v$process p, v$session s where p.addr=s.paddr and s.machine='&HOSTNAME' AND S.process=&OSPID; ################################################################################################################################################################ set line 200 pagesize 9999 select * from x$kgllk where kgllkses in (select saddr from v$session); select * from x$kgllk lock_a where kgllkreq = 0 and exists (select lock_b.kgllkhdl from x$kgllk lock_b where kgllkses in (select saddr from v$session) and lock_a.kgllkhdl = lock_b.kgllkhdl and kgllkreq > 0); ----------------blocking session: select sid,username,terminal,program from v$session where saddr in (select kgllkses from x$kgllk lock_a where kgllkreq = 0 and exists (select lock_b.kgllkhdl from x$kgllk lock_b where kgllkses in (select saddr from v$session) and lock_a.kgllkhdl = lock_b.kgllkhdl and kgllkreq > 0) ); -------------------blocked sessions: select sid,username,terminal,program from v$session where saddr in (select kgllkses from x$kgllk lock_a where kgllkreq > 0 and exists (select lock_b.kgllkhdl from x$kgllk lock_b where kgllkses in (select saddr from v$session) and lock_a.kgllkhdl = lock_b.kgllkhdl and kgllkreq = 0) ); ############################################################################################################## 解决问题的方法步骤 1、查看具体产生library cache lock 的对象,比如哪些包和存储过程 SELECT KGLNAOWN,KGLNAOBJ FROM x$kglob WHERE kglhdadr in( select P1RAW from v$session_wait where event like 'library cache%'); 2、 查看具体是那些用户做了这个操作导致 library cache lock select sid, program ,machine from v$session where paddr in ( SELECT s.paddr FROM x$kglpn p, v$session s WHERE p.kglpnuse=s.saddr(+) AND p.kglpnmod <> 0 and kglpnhdl in ( select p1raw from v$session_wait where event='library cache lock')); 3、、以下语句用来杀掉会话(前面查看,然后到这步是决定是否要杀掉进程解决这个问题) select 'kill -9 '||spid from v$process where addr in ( SELECT s.paddr FROM x$kglpn p, v$session s WHERE p.kglpnuse=s.saddr AND p.kglpnmod <> 0 and kglpnhdl in ( select p1raw from v$session_wait where event='library cache lock'));