一、定位数据库故障原因。
定位原因大概可以分三步走:
1、如果有oracle错误号或者alert日志中有详细的出错信息,则可以根据这些去定位数据库故障原因;
2、如果没有,则可以运行awr工具或者statspack工具生成报告,根据报告去定位数据库故障原因;
3、如果没有上述工具,则可以查看相关数据库字典,去定位数据库故障原因。
脚本1:下面语句用v$sysstat视图检索数据库开销和cpu等待时间,从而是判断你的数据库是否发生了资源等待事件。
select
round(100*CPU_sec/available_time,2) "ORACLE CPU TIME AS % AVAIL."
,round(100*(DB_sec - CPU_sec)/available_time,2) "NON-IDLE WAITS AS % AVAIL."
,case
sign(available_time - DB_sec)
when 1 then round(100*(available_time - DB_sec) / available_time, 2)
else 0
end "ORACLE IDLE AS % AVAIL."
from
(
select
(sysdate - i.startup_time) * 86400 * c.cpus available_time
,t.DB_sec
,t.CPU_sec
from v$instance i
,(select value cpus
from v$parameter
where name = 'cpu_count') c
,(select
sum(case name
when 'DB time' then round(value/100)
else 0
end) DB_sec
,sum(case name
when 'DB time' then 0
else round(value/100)
end) CPU_sec
from v$sysstat
where name in ('DB time', 'CPU used by this session')) t
where i.instance_number = userenv('INSTANCE')
);
Here is some output:
ORACLE CPU TIME AS % AVAIL. NON-IDLE WAITS AS % AVAIL. ORACLE IDLE AS % AVAIL.
--------------------------- -------------------------- -----------------------
31.97 129.59
脚本2:上面的查询结果(NON-IDLE WAITS)显示数据库存在大量等待事件,下面的语句可以查出具体的等待事件。
col wait_class format A15
col event format A35 trunc
col "CLASS AS % OF WHOLE" format 990.00 HEAD "CLASS AS|% OF WHOLE"
col "EVENT AS % OF CLASS" like "CLASS AS % OF WHOLE" HEAD "EVENT AS|% OF CLASS"
col "EVENT AS % OF WHOLE" like "CLASS AS % OF WHOLE" HEAD "EVENT AS|% OF WHOLE"
set pagesize 30
break on wait_class on "CLASS AS % OF WHOLE"
--
select
wait_class,
round(100 * time_class / total_waits, 2) "CLASS AS % OF WHOLE",
event,
round(100 * time_waited / time_class, 2) "EVENT AS % OF CLASS",
round(100 * time_waited / total_waits, 2) "EVENT AS % OF WHOLE"
from
(select
wait_class
,event
,time_waited
,sum(time_waited) over (partition by wait_class) time_class
,rank() over (partition by wait_class order by time_waited desc) rank_within_class
,sum(time_waited) over () total_waits
from v$system_event
where wait_class <> 'Idle')
where rank_within_class <= 3
order by time_class desc, rank_within_class;
三、判断在线重做日志大小是否合适。
问题:如果发生log file switch (checkpoint incomplete) 等待事件,需要检测在线重做日志交换的频率。
脚本1:这个脚本可以查看每小时在线日志交换的次数,一般的做法,如果日志交换次数过频繁,可以适当调整一下在线日志的大小。
select count(*), to_char(first_time, 'YYYY:MM:DD:HH24')
from v$log_history
group by to_char(first_time, 'YYYY:MM:DD:HH24')
order by 2;
脚本2:查看相关在线重做日志文件。
select a.group#, a.member, b.status, b.bytes / 1024 / 1024 meg_bytes
from v$logfile a, v$log b
where a.group# = b.group#
order by a.group#;
四、判断undo表空间大小是否合适。
问题:运行长sql语句的时候,发生ORA-01555 “snapshot too old” 错误,需要检查一下undo表空间大小是否合适。
脚本:
select
to_char(begin_time,'MM-DD-YYYY HH24:MI') begin_time
,ssolderrcnt ORA_01555_cnt
,nospaceerrcnt no_space_cnt
,txncount max_num_txns
,maxquerylen max_query_len
,expiredblks blck_in_expired
from v$undostat
where begin_time > sysdate - 1
order by begin_time;
注:
1、ORA_01555_cnt列表明是否发生了ORA-01555错误,如果发生,则判断下面几步:
*Ensure that code does not contain COMMIT statements within cursor loops.
*Tune the SQL statement throwing the error so that it runs faster.
*Ensure that you have good statistics (so your SQL runs efficiently).
*Increase the UNDO_RETENTION initialization parameter.
2、如果NO_SPACE_CNT列非0,需要增大undo表空间大小。
3、现在undo表空间的大小和oracle建议大小。
select sum(bytes) / 1024 / 1024 cur_mb_size,
dbms_undo_adv.required_undo_size(900) req_mb_size
from dba_data_files
where tablespace_name LIKE 'UNDO%';
五、判断临时表空间大小是否合适。
问题:ORA-01652: unable to extend temp segment by 128 in tablespace TEMP
select tablespace_name, sum(bytes_used) / 1024 / 1024 mb_used
from v$temp_extent_pool
group by tablespace_name;
select name, bytes/1024/1024 mb_alloc from v$tempfile;
alter database tempfile '/ora03/DWREP/temp03.dbf' resize 12g; 或者
alter tablespace temp add tempfile '/ora04/DWREP/temp04.dbf' size 2g;
六、检查表空间使用情况。
select a.tablespace_name,
(f.bytes / a.bytes) * 100 pct_free,
f.bytes / 1024 / 1024 mb_free,
a.bytes / 1024 / 1024 mb_allocated
from (select nvl(sum(bytes), 0) bytes, x.tablespace_name
from dba_free_space y, dba_tablespaces x
where x.tablespace_name = y.tablespace_name(+)
and x.contents != 'TEMPORARY'
and x.status != 'READ ONLY'
and x.tablespace_name not like 'UNDO%'
group by x.tablespace_name) f,
(select sum(bytes) bytes, tablespace_name
from dba_data_files
group by tablespace_name) a
where a.tablespace_name = f.tablespace_name
order by 1;
REM *****************************************************
REM * File : freesp.sql
REM *****************************************************
SET PAGESIZE 100 LINES 132 ECHO OFF VERIFY OFF FEEDB OFF SPACE 1 TRIMSP ON
COMPUTE SUM OF a_byt t_byt f_byt ON REPORT
BREAK ON REPORT ON tablespace_name ON pf
COL tablespace_name FOR A17 TRU HEAD 'Tablespace|Name'
COL file_name FOR A40 TRU HEAD 'Filename'
COL a_byt FOR 9,990.999 HEAD 'Allocated|GB'
COL t_byt FOR 9,990.999 HEAD 'Current|Used GB'
COL f_byt FOR 9,990.999 HEAD 'Current|Free GB'
COL pct_free FOR 990.0 HEAD 'File %|Free'
COL pf FOR 990.0 HEAD 'Tbsp %|Free'
COL seq NOPRINT
DEFINE b_div=1073741824
--
COL db_name NEW_VALUE h_db_name NOPRINT
COL db_date NEW_VALUE h_db_date NOPRINT
SELECT name db_name, TO_CHAR(sysdate,'YYYY_MM_DD') db_date FROM v$database;
--
SPO &&h_db_name._&&h_db_date..lis
PROMPT Database: &&h_db_name, Date: &&h_db_date
--
SELECT 1 seq,
b.tablespace_name,
nvl(x.fs, 0) / y.ap * 100 pf,
b.file_name file_name,
b.bytes / &&b_div a_byt,
NVL((b.bytes - SUM(f.bytes)) / &&b_div, b.bytes / &&b_div) t_byt,
NVL(SUM(f.bytes) / &&b_div, 0) f_byt,
NVL(SUM(f.bytes) / b.bytes * 100, 0) pct_free
FROM dba_free_space f,
dba_data_files b,
(SELECT y.tablespace_name, SUM(y.bytes) fs
FROM dba_free_space y
GROUP BY y.tablespace_name) x,
(SELECT x.tablespace_name, SUM(x.bytes) ap
FROM dba_data_files x
GROUP BY x.tablespace_name) y
WHERE f.file_id(+) = b.file_id
AND x.tablespace_name(+) = y.tablespace_name
and y.tablespace_name = b.tablespace_name
AND f.tablespace_name(+) = b.tablespace_name
GROUP BY b.tablespace_name,
nvl(x.fs, 0) / y.ap * 100,
b.file_name,
b.bytes
UNION
SELECT 2 seq,
tablespace_name,
j.bf / k.bb * 100 pf,
b.name file_name,
b.bytes / &&b_div a_byt,
a.bytes_used / &&b_div t_byt,
a.bytes_free / &&b_div f_byt,
a.bytes_free / b.bytes * 100 pct_free
FROM v$temp_space_header a,
v$tempfile b,
(SELECT SUM(bytes_free) bf FROM v$temp_space_header) j,
(SELECT SUM(bytes) bb FROM v$tempfile) k
WHERE a.file_id = b.file#
ORDER BY 1, 2, 4, 3;
--
COLUMN name FORMAT A60 HEAD 'Control Files'
SELECT name FROM v$controlfile;
--
COL member FORMAT A50 HEAD 'Redo log files'
COL status FORMAT A9 HEAD 'Status'
COL archived FORMAT A10 HEAD 'Archived'
--
SELECT
a.group#, a.member, b.status,
b.archived, SUM(b.bytes)/1024/1024 mb
FROM v$logfile a, v$log b
WHERE a.group# = b.group#
GROUP BY a.group#, a.member, b.status, b.archived
ORDER BY 1, 2;
--
七、检查索引使用情况。
1、alter index F_DOWN_DOM_FK9 monitoring usage;
2、select * from v$object_usage;
3、select 'alter index ' || index_name || ' monitoring usage;' from user_indexes;