select T1.INST_ID, t2.sid, t2.SERIAL#, t2.CLIENT_INFO, t3.OBJECT_NAME, t2.STATUS, t2.PROGRAM, t4.SQL_FULLTEXT, 'ALTER SYSTEM KILL SESSION '||''''||T2.SID||','||T2.SERIAL#||',@'||T1.INST_ID||''''||' IMMEDIATE;' SQL_EXEC from gv$locked_object t1, gv$session t2, dba_objects t3,v$sql t4 where t1.SESSION_ID = t2.SID and t1.INST_ID=t2.INST_ID and t1.OBJECT_ID = t3.OBJECT_ID and t2.sql_id=t4.SQL_ID(+) AND T2.STATUS='INACTIVE';
|
nohup /etc/init.d/ohasd run &
--重启HAS进程
针对于启动has无法自动启动has的agent进行启动集群情况。
kill -HUP ohas进程ID
##集群名称##
cemutlo –n
crs_stat –v –t
检查集群栈状态
crsctl stop cluster –all
crsctl start cluster –all
crsctl check cluster –all --关闭crs css evm相关所有资源
crsctl stop has 关闭整个ha
srvctl status nodeapps
srvctl config scan
srvctl status scan
srvctl status scan_listener
srvctl config vip -n rac01
srvctl status asm -a
srvctl status diskgroup -g data
crsctl status res -t
crsctl status res -t -init
crsctl check ctss
crsctl check crs
ocrcheck
crsctl query css votedisk
./crsctl start res ora.crsd –init 10G crs_start “123”
crs_stat -t -v ora.registry.acfs
crsctl start resource ora.cssd –init
将节点1的scan监听重新分配,则分配到节点2上面,节点2上面跑3个scan listener
crsctl relocate res ora.LISTENER_SCAN1.lsnr -f
[grid@myrac01 ~]$ crsctl start res ora.prod.db =srvctl start database –d prod
CRS-2672: Attempting to start 'ora.prod.db' on 'myrac01'
CRS-2672: Attempting to start 'ora.prod.db' on 'myrac02'
CRS-2676: Start of 'ora.prod.db' on 'myrac01' succeeded
CRS-2676: Start of 'ora.prod.db' on 'myrac02' succeeded
[grid@myrac01 ~]$ crsctl status res ora.prod.db
appvipcfg create -network=1 \
> -ip=192.168.1.150 \
> -vipname=oggvip \
> -user=root
crsctl setperm resource oggvip -u user:oracle:r-x
#./crsctl add resource oggapp -type cluster_resource -attr "ACTION_SCRIPT=/ogg/action/ogg_action.scr,CHECK_INTERVAL=30, START_DEPENDENCIES='hard(oggvip,ora.asm) pullup(oggvip)', STOP_DEPENDENCIES='hard(oggvip)'"
#./crsctl status resource oggapp
#./crsctl setperm resource oggapp -o oracle
#./crsctl start resource oggapp
#./crsctl relocate resource oggapp -f --重新分配
#./crsctl delele res oggapp –f
#./crsctl delete res oggvip –f
crsctl start res ora.cluster_interconnect.haip –init
crsctl status res ora.cluster_interconnect.haip –f -init
./crsctl modify res ora.cluster_interconnect.haip -attr "ENABLED=1" –init --init初始化进程如果加上-init才能看见haip啊
crsctl status res ora.cluster_interconnect.haip -init -f
select INST_ID,sid,serial# from gv$session where status='INACTIVE' AND TYPE='USER';
ALTER SYSTEM KILL SESSION '57,243,@1' immediate;
Oracle 11g RAC 集群中引入了SCAN(Single Client Access Name)的概念,也就是指集群的单客户端访问名称。SCAN 这个特性为客户端提供了单一的主机名,用于访问集群中运行的 oracle 数据库。如果您在集群中添加或删除节点,使用 SCAN 的客户端无需更改自己的 TNS 配置。无论集群包含哪些节点,SCAN 资源及其关联的 IP 地址提供了一个稳定的名称供客户端进行连接使用。在Oracle 11g grid 安装时即要求为该特性配置DNS解析方式或GNS解析方式。本文描述了安装Oracle 11g grid时的DNS配置。
本文件将配置DNS服务器,并且测试oracle11Gr2下使用SCAN和DNS配置网络连接。
本文件涉及到的机器
|
DNS机器 |
集群节点1 |
集群节点1 |
客户端 |
OS |
Rhel63 |
Rhel63 |
Rhel63 |
Win7 |
IP |
192.168.114.138 |
IP:192.168.114.141 VIP:192.168.114.143 Priv:172.168.114.141 |
IP:192.168.114.142 VIP:192.168.114.144 Priv:172.168.114.142 |
192.168.114.127 |
SCAN IP: 192.168.114.145 192.168.114.146 192.168.114.147 |
# cat /etc/issue
Red Hat Enterprise Linux Server release 6.3(Santiago)
Kernel \r on an \m
SQL> select * from V$version;
BANNER
--------------------------------------------------------------------------------
Oracle Database 11g Enterprise EditionRelease 11.2.0.4.0 - 64bit Production
PL/SQL Release 11.2.0.4.0 - Production
CORE 11.2.0.4.0 Production
TNS for linux: Version 11.2.0.4.0 -Production
NLSRTL Version 11.2.0.4.0 - Production
[root@dns ~]# yum install bind*
[root@dns ~]# cat /etc/resolv.conf
search localdomain ---客户端书写域名
nameserver 192.168.205.130
保证resolv.conf不会自动修改
[root@dns ~]#chattr +i /etc/resolv.conf
R6
[root@dns ~]# vi /etc/named.conf
修改:
listen-on port 53 {any; };
allow-query { any; }; --也可以是具体IP段,如192.168.110.0/24;
并为配置简单,将一些安装相关的用//注释掉
options {
listen-on port 53 {any; };
listen-on-v6 port 53 { ::1; };
directory "/var/named";
dump-file "/var/named/data/cache_dump.db";
statistics-file "/var/named/data/named_stats.txt";
memstatistics-file "/var/named/data/named_mem_stats.txt";
allow-query { any; };
recursion yes;
// dnssec-enable yes;
// dnssec-validation yes;
// dnssec-lookaside auto;
/* Path to ISC DLV key */
// bindkeys-file "/etc/named.iscdlv.key";
// managed-keys-directory "/var/named/dynamic";
};
logging {
channel default_debug {
file"data/named.run";
severity dynamic;
};
};
zone "." IN {
type hint;
file "named.ca";
};
include"/etc/named.rfc1912.zones";
//include "/etc/named.root.key";
named.conf中只有“.”区域,其他的在/etc/named.rfc1912.zones中配置
在/etc/named.rfc1912.zones中创建正向反向区域
[root@dns ~]# vi /etc/named.rfc1912.zones
加入下面两部分
zone "clgrac.com"IN {
type master;
file "clgrac.com.zone";
allow-update { none; };
};
zone "214.168.192.in-addr.arpa"IN {
type master;
file "192.168.214.arpa";
allow-update { none; };
};
[root@dns ~]# cd /var/named/
[root@dns named]# cp -p named.localhost clgrac.com.zone
[root@dns named]# cp -p named.localhost 192.168.214.arpa
¥##############################R6 ############################
修改正向解析:
[root@dns named]# vi /var/named/clgrac.com.zone
$TTL 5M
@ IN SOA @ rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H) ; minimum
NS @
A 192.168.205.130
AAAA ::1
dr01 A 192.168.205.130
@ MX 5 mail.localdomain
$GENERATE 10-250 stu$ A 192.205.130.$
修改反向解析:
[root@dns ~]# vi /var/named/192.168.214.arpa
$TTL 1D
@ IN SOA @ rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H) ; minimum
NS dgtarget.com.
A 127.0.0.1
AAAA ::1
130 PTR dr01.dgtarget.com
[root@dns named]# /etc/rc.d/init.d/named restart
Stopping named: [ OK ]
Starting named: [ OK ]
######################################R7##################################
[root@rac01 named]# vi clgrac.com.zone
$TTL 1D
@ IN SOA clgrac.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H ) ; minimum
NS clgrac.com.
A 192.168.214.129
AAAA ::1
rac01 IN A 192.168.214.129
myscan IN A 192.168.214.133
myscan IN A 192.168.214.134
myscan IN A 192.168.214.135
[root@rac01 named]# more 192.168.214.arpa
$TTL 1D
@ IN SOA clgrac.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H ) ; minimum
NS clgrac.com.
129 IN PTR rac01.clgrac.com.
133 IN PTR myscan.clgrac.com.
134 IN PTR myscan.clgrac.com.
135 IN PTR myscan.clgrac.com.
Systemctl enable named
[root@dns ~]# netstat -anp|grep :53
tcp 0 0 192.168.114.138:53 0.0.0.0:* LISTEN 2104/named
tcp 0 0 127.0.0.1:53 0.0.0.0:* LISTEN 2104/named
tcp 0 0 ::1:53 :::* LISTEN 2104/named
udp 0 0 0.0.0.0:5353 0.0.0.0:* 1930/avahi-daemon
udp 0 0 172.168.146.138:53 0.0.0.0:* 2104/named
udp 0 0 192.168.114.138:53 0.0.0.0:* 2104/named
udp 0 0 127.0.0.1:53 0.0.0.0:* 2104/named
udp 0 0 ::1:53 :::* 2104/named
其他linux访问时只需要修改:
[root@racnode2 grid]# vi /etc/resolv.conf
search racn1.pera.com
nameserver 192.168.114.138
保证resolv.conf不会自动修改
chattr +i /etc/resolv.conf
[root@racn2 ~]# more /etc/hosts
127.0.0.1 localhost localhost.pera.com localhost4 localhost4.pera.com4
::1 localhost localhost.pera.com localhost6 localhost6.pera.com6
192.168.114.141 racn1.pera.com racn1
192.168.114.142 racn2.pera.com racn2
192.168.114.143 racn1-vip.pera.com racn1-vip
192.168.114.144 racn2-vip.pera.com racn2-vip
172.168.1.141 racn1-priv.pera.com racn1-priv
172.168.1.142 racn2-priv.pera.com racn2-priv
192.168.114.145 racn-cluster-scan.pera.com racn-cluster-scan
192.168.114.146 racn-cluster-scan.pera.com racn-cluster-scan
192.168.114.147 racn-cluster-scan.pera.com racn-cluster-scan
在保证以前内容正确的条件进行集群安装或修改现在集群,保证scan ip的访问正常(本文不做讨论)
可参考:http://blog.csdn.NET/bamuta/article/details/24410801
--chkconfig –list named
--chkconfig named on
Windows下进行连接:
首先需要,配好DNS:
修改tnsnames.ora
vmpera =
(DESCRIPTION =
(ADDRESS_LIST =
(ADDRESS = (PROTOCOL = TCP)(HOST = racn-cluster-scan.pera.com)(PORT =1521))
)
(CONNECT_DATA =
(SERVICE_NAME = pera)
)
)
另外需要修改hosts文件把加入以下内容
192.168.114.127 localhost
--经测试,加入本网段内任一IP都可以,不管能不能ping通。
进行连接
SQL> conn system/oracle@vmpera
已连接。
简单测试具有oad_balance功能,在不同的连接中连接到的实例不同。
为了修改scan,首先要保证SCAN所对应的主机名称和IP地址记录在DNS中,并且在操作系统中应该通过nslookup命令对这个名称解析进行测试。
$srvctl config scan
srvctl config scan
srvctl status scan
srvctl stop scan_listener --要先停
---关闭scan listener方法
srvctl stop scan_listener –i 1 -n
srvctl stop scan
srvctl stop listener 不加-l选项默认关闭全部监听程序。
srvctl status scan_listener
crs_stat -t | grep scan
[root@rac01 bin]# ./srvctl modify scan -n myrac-scan
[root@rac01 bin]# ./srvctl config scan
SCAN name: myrac-scan, Network: 1/192.168.205.0/255.255.255.0/eth0
SCAN VIP name: scan1, IP: /myrac-scan/192.168.205.135
SCAN VIP name: scan2, IP: /myrac-scan/192.168.205.133
SCAN VIP name: scan3, IP: /myrac-scan/192.168.205.134
集群状态如下
但是此时只有在节点2上面有listener_scan1,添加listener_scan2
[root@db1 bin]# ./srvctl modify scan_listener -u
------添加scanIP的监听,若不执行该命令,将只有一个listener_scan1监听,执行后将根据scan
srvctl start scan
srvctl start scan_listener
验证
[grid@rac01 ~]$ ps -ef|grep tns|grep -v grep
root 10 2 0 19:52 ? 00:00:00 [netns]
grid 4047 1 0 19:55 ? 00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER -inherit
grid 10428 1 0 20:43 ? 00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER_SCAN2 -inherit
grid 10457 1 0 20:43 ? 00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER_SCAN3 –inherit
lsnrctl status listener_scan3
发现2个节点如果配置3个scan ip则扔有3个SCAN LISTENER。
http://www.askmaclean.com/archives/11gr2-rac-add-listener-static-register.html
########################R6####################
1 写入dns
vi /var/named/dgtarget.com.zone
$TTL 5M
@ IN SOA dgtarget.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H) ; minimum
NS dgtarget.com.
A 192.168.205.130
AAAA ::1
dr01 A 192.168.205.130
myrac-scan A 192.168.205.133
myrac-scan A 192.168.205.134
myrac-scan A 192.168.205.135
@ MX 5 mail.localdomain
$GENERATE 10-250 stu$ A 192.205.130.$
vi /var/named/192.168.205.arpa
$TTL 1D
@ IN SOA dgtarget.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H) ; minimum
NS dgtarget.com.
A 127.0.0.1
AAAA ::1
130 PTR dr01.dgtarget.com
133 PTR myrac-scan.dgtarget.com.
134 PTR myrac-scan.dgtarget.com.
135 PTR myrac-scan.dgtarget.com.
重启dns服务
/etc/rc.d/init.d/named restart
##################################R7##################################
[root@rac01 named]# vi clgrac.com.zone
$TTL 1D
@ IN SOA clgrac.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H ) ; minimum
NS clgrac.com.
A 192.168.214.129
AAAA ::1
rac01 IN A 192.168.214.129
myscan IN A 192.168.214.133
myscan IN A 192.168.214.134
myscan IN A 192.168.214.135
[root@rac01 named]# more 192.168.214.arpa
$TTL 1D
@ IN SOA clgrac.com. rname.invalid. (
0 ; serial
1D ; refresh
1H ; retry
1W ; expire
3H ) ; minimum
NS clgrac.com.
129 IN PTR rac01.clgrac.com.
133 IN PTR myscan.clgrac.com.
134 IN PTR myscan.clgrac.com.
135 IN PTR myscan.clgrac.com.
chkconfig named on
rpm -ivh bind-9.8.2-0.10.rc1.el6.x86_64.rpm
rpm -ivh bind-9.8.2-0.10.rc1.el6.x86_64.rpm
rpm -ivh bind-chroot-9.8.2-0.10.rc1.el6.x86_64.rpm
chkconfig named on
vi /etc/resolv.conf
search dgtarget.com
nameserver 192.168.205.130
/etc/hosts
chattr +i /etc/resolv.conf
#192.168.205.133 myrac-scan.dgtarget.com myrac-scan
#192.168.205.134 myrac-scan.dgtarget.com myrac-scan
#192.168.205.135 myrac-scan.dgtarget.com myrac-scan
[grid@rac01 ~]$ srvctl config vip -n rac01
如果需要修改VIP地址,需要停止数据库service的运行,service是OCR的一种资源,它的功能是提供数据库的高可用性
srvctl stop service –d orcl –s orcl –n rac01
srvctl stop vip –n rac01
# srvctl modify nodeapps –n rac01 –A 192.168.205.189/255.255.255.0/en0
srvctl start vip –n rac01
srvctl start service –d orcl –s orcl –n rac01
---测试案例
[grid@rac01 ~]$ srvctl stop vip -n rac01
PRCR-1014 : Failed to stop resource ora.rac01.vip
PRCR-1065 : Failed to stop resource ora.rac01.vip
CRS-2529: Unable to act on 'ora.rac01.vip' because that would require stopping or relocating 'ora.LISTENER.lsnr', but the force option was not specified
--- 关闭listener
srvctl stop listener -l listener
srvctl stop listener -l listener_prod
srvctl stop vip -n rac01
--调整hosts文件
192.168.205.136 rac01-vip 将地址从131修改为136
192.168.205.132 rac02-vip
--修改
[root@rac01 bin]# ./srvctl modify nodeapps -n rac01 -A 192.168.205.136/255.255.255.0/eth0
--确认
[root@rac01 bin]# ./srvctl config vip -n rac01
VIP exists: /192.168.205.136/192.168.205.136/192.168.205.0/255.255.255.0/eth0, hosting node rac01
./srvctl start vip -n rac01
./srvctl start listener -l listener
./srvctl start listener -l listener_prod
对于RAC所使用的私有网络和公共网络,我们可以修改它们的IP地址、子网掩码、以及所使用网卡的名称。通过oifcfg命令可以查看和调整RAC的网络配置。
Root
1 查看
oifcfg getif
在上述结果中public表示公共网络,global表示使用的网卡名称以及子网掩码相同,
2 修改public
$./oifcfg setif -global eth0/192.168.214.0:public
3 修改private
$oifcfg setif –global en0/192.168.214.0:cluster_interconnect
如果修改了RAC所使用的IP地址,RAC将无法正常工作,为了使新地址生效,需要重启crs
crsctl stop crs
crstl start crs
将public和private网卡修改到其他网卡上面
$oifcfg setif –global en3/192.168.214.0:cluster_interconnect
$oifcfg setif –global en4/192.168.214.0:public
$oifcfg delif –global en0/192.168.214.0
crsctl stop crs
crstl start crs --重启crs,使配置生效
----更新inventory
./crsctl delete node -n rac02
cd $ORACLE_HOME/oui/bin
./runInstaller -updatenodelist ORACLE_HOME=$ORACLE_HOME cluster_nodes={rac01} CRS=TRUE –slient
./runInstaller -updatenodelist ORACLE_HOME=$ORACLE_HOME cluster_nodes={rac01} CRS=TRUE –slient –local
//以上两句为更新节点清单脚本、///
---测试删除节点myrac02,----完全未影响节点1
一、备份OCR
su - root
# $GRID_HOME/bin/ocrconfig -manualbackup
# GRID_HOME/bin/ocrdump /tmp/ocrdump_ocr.bak
二、删除DB实例
关闭被删除节点的实例
sqlplus / as sysdba
shutdown immediate
@保留的节点
su - oracle
dbca -silent -deleteInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword 123
[oracle@myrac01 ~]$ dbca -silent -deleteInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword 123
Deleting instance
20% complete
21% complete
22% complete
26% complete
33% complete
40% complete
46% complete
53% complete
60% complete
66% complete
Completing instance management.
100% complete
Look at the log file "/u01/app/oracle/cfgtoollogs/dbca/prod.log" for further details.
[oracle@myrac01 ~]$
三、RAC层面删除节点(删除oracle home)
1.停止并禁用listener
@被删除节点
# su - oracle
--监听状态
[oracle@myrac02 ~]$ srvctl stop listener -l listener –n myrac02
--停止并禁用listener
srvctl disable listener -n 被删除节点名
srvctl stop listener -n 被删除节点名
--确认
$ srvctl status listener
2.移除oracle home
@被删除节点
<1>更新节点列表信息
cd $ORACLE_HOME/oui/bin
./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" -local
log如下:
[oracle@myrac02 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" -local
Starting Oracle Universal Installer...
Checking swap space: must be greater than 500 MB. Actual 3754 MB Passed
The inventory pointer is located at /etc/oraInst.loc
The inventory is located at /u01/app/oraInventory
'UpdateNodeList' was successful.
<2>卸载oracle db,删除oracle home
$ORACLE_HOME/deinstall/deinstall -local
<3>在任意一个保留节点上更新节点列表信息
@保留节点
su - oracle
cd $ORACLE_HOME/oui/bin
./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}"
[oracle@myrac01 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}"
Starting Oracle Universal Installer...
Checking swap space: must be greater than 500 MB. Actual 3811 MB Passed
The inventory pointer is located at /etc/oraInst.loc
The inventory is located at /u01/app/oraInventory
'UpdateNodeList' was successful.
四、Grid Infrastructure层面删除节点(删除grid home)
1.确定节点状态是否是Unpinned
su - grid
[grid@rac1 ~]$ olsnodes -s -t
如果是pinned,请设为Unpinned
crsctl unpin css -n 被删除节点名
2.在被删除节点禁用clusterware的applications and daemons
@被删除节点
su - root
# cd $GRID_HOME/crs/install
# ./rootcrs.pl -deconfig -force
3.将被删除节点从节点信息中删除
@保留节点
su - root
# crsctl delete node -n myrac02
root@myrac01 bin]# ./crsctl delete node -n myrac02
CRS-4661: Node myrac02 successfully deleted.
[root@myrac01 bin]# ./olsnodes -s -t
myrac01 Active Unpinned
4.更新节点列表信息
@被删除节点
su - grid
cd $Grid_home/oui/bin
$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" CRS=TRUE -silent –local
grid@myrac02 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" CRS=TRUE -silent -local
Starting Oracle Universal Installer...
Checking swap space: must be greater than 500 MB. Actual 3940 MB Passed
The inventory pointer is located at /etc/oraInst.loc
The inventory is located at /u01/app/oraInventory
'UpdateNodeList' was successful.
5.在保留节点更新节点列表
@保留节点
su - grid
cd $Grid_home/oui/bin
$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}" CRS=TRUE –silent
[grid@myrac01 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}" CRS=TRUE -silent
Starting Oracle Universal Installer...
Checking swap space: must be greater than 500 MB. Actual 3632 MB Passed
The inventory pointer is located at /etc/oraInst.loc
The inventory is located at /u01/app/oraInventory
'UpdateNodeList' was successful.
6.卸载GI,删除grid home
@被删除节点
su - grid
cd $Grid_home/deinstall
./deinstall –local
1 输入listener
2 输入VIP
3 输入diagnostic_dest目录
4 Enter the OCR/Voting Disk diskgroup name []: OCR
5 De-configuring ASM will drop the diskgroups at cleanup time. Do you want deconfig tool to drop the diskgroups y|n [y]: n
然后根据提示执行root脚本,完毕后回车继续。
7.在保留节点执行,更新节点列表
@保留节点
su - grid
cd $Grid_home/oui/bin
$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES=
{myrac01}" CRS=TRUE -silent
8.执行CVU命令,确认指定节点已经从集群中删除
su - grid
--注意:nodedel是参数名,不要修改
$ cluvfy stage -post nodedel -n 被删除节点名
$ cluvfy stage -post nodedel -n myrac02
grid@myrac01 bin]$ cluvfy stage -post nodedel -n myrac02
Performing post-checks for node removal
Checking CRS integrity...
Clusterware version consistency passed
CRS integrity check passed
Node removal check passed
Post-check for node removal was successful.
$ crsctl status res -t
su - oracle
set linesize 200
select INSTANCE_NUMBER,INSTANCE_NAME,HOST_NAME,VERSION,STARTUP_TIME,STATUS from gv$instance;
---尝试重启has
发现scan以及listener均正常
至此完毕。
概述:
ORACLE RAC 11.2的添加节点
1,停止应用程序,做好当前系统备份.实际上不停也可以,尽量申请到停机时间来增加新的节点,crs和rac数据库不需要停止,而且所以的节点都要启动.
比如集群原来有两个节点,不能关闭一个节点,然后再增加一个新的节点.
2,连接共享存储,需要注意ASM下是采用asmlib,udev或是rawdevice的方式,保持新的节点和原来的节点一致.
3,内核参数,limits.conf,用户相等性,ntp同步,目录变量,uid和gid 都要相等,软件包安装一致;
4,新节点(2),原来的节点(1).
在原来的点上检查一下配置,以grid用户执行
cluvfy stage -post hwos -n myrac02 -verbose
cluvfy stage -pre crsinst -n myrac02 -verbose
cluvfy comp peer -refnode myrac01 -n myrac02 -verbose --参考当前的节点来进行检测.
对mismatched的要注意
cd $GRID_HOME/bin
cluvfy stage -pre nodeadd -n myrac02 -verbose 增加节点检查
5,在原来的节点的$GRID_HOME/out/bin(oracle 12C是在$GRID_HOME/addnode)下执行addNode.sh,增加CRS到新节点上去,GRID用户执行
addNode.sh "CLUSTER_NEW_NODES={myrac02}" "CLUSTER_NEW_VIRTUAL_HOSTNAMES={myrac02-vip}"
--- export IGNORE_PREADDNODE_CHECKS=Y 只有在确认cluvfy忽略错误不会导致真正失败时才可以设置。
Instantiating scripts for add node (Tuesday, July 11, 2017 7:54:32 AM PDT)
. 1% Done.
Instantiation of add node scripts complete
Copying to remote nodes (Tuesday, July 11, 2017 7:54:37 AM PDT)
............................................................................................... 96% Done.
Home copied to new nodes
Saving inventory on nodes (Tuesday, July 11, 2017 8:00:46 AM PDT)
. 100% Done.
Save inventory complete
WARNING:
The following configuration scripts need to be executed as the "root" user in each new cluster node. Each script in the list below is followed by a list of nodes.
/u01/app/11.2.0/grid/root.sh #On nodes myrac02
To execute the configuration scripts:
1. Open a terminal window
2. Log in as "root"
3. Run the scripts in each cluster node
接下来就是等待,根据服务器的性能和网络状况时间有所不同.
6,执行完毕后,再次在myrac01上检查nodeadd情况
[grid@myrac02 bin]$ ./cluvfy stage -post nodeadd -n myrac02 -verbose
[grid@r1 ~]$ olsnodes
r1
r2
[grid@r1 ~]$ crsctl check cluster -all
**************************************************************
myrac01:
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
**************************************************************
myrac02:
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
**************************************************************
7,扩展oracle rac database到新节点r2上,在原来的节点的$ORACLE_HOME/out/bin(oracle 12C是在$ORACLE_HOME/addnode)下执行addNode.sh
[oracle@r1 bin]$ ./addNode.sh "CLUSTER_NEW_NODES={myrac02}" --new X window,longer time than add cluster node
部分日志:
WARNING:
The following configuration scripts need to be executed as the "root" user in each new cluster node. Each script in the list below is followed by a list of nodes.
/u01/app/oracle/product/11.2.0/db_1/root.sh #On nodes myrac02
To execute the configuration scripts:
1. Open a terminal window
2. Log in as "root"
3. Run the scripts in each cluster node
The Cluster Node Addition of /u01/app/oracle/product/11.2.0/db_1 was successful.
Please check '/tmp/silentInstall.log' for more details.
8,执行完毕后,在新节点上增加一个新的实例用到集群数据库中
srvctl add instance -d prod -i prod2 -n myrac02
srvctl add instance -d tt -i tt2 -n r2 - add instance to new node
[oracle@r2 ~]$ srvctl config database -d tt | grep instance --confirm add instnace sucessfully
Database instances: tt1,tt2
System altered.
10,启动新节点上的实例
1 修改pfile文件。
2 添加thread 日志。
3 启动thread线程日志
SQL> alter database add logfile thread 2 group 3 size 30M;
SQL> alter database add logfile thread 2 group 4 size 30M;
SQL> alter database enable thread 2;
$ dbca -silent -addInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword "1234"
[oracle@r2 ~]$ srvctl start instance -d tt -i tt2
11,确认一下实例增加成功
[oracle@r1 bin]$ srvctl status database -d tt
Instance tt1 is running on node r1
Instance tt2 is running on node r2
基本操作步骤:
(一)设置网络
(二)添加组、目录和用户
(三)修改相关OS参数
(四)修改用户环境变量
(五)设置存储
(六)配置节点互信
(七)安装RPM包
注:1-7步略(其中的修改需要参照其他节点的配置)
(八)安装GI前检测
cd $GRID_HOME/bin
[grid@myrac01 bin]$ cluvfy stage -pre nodeadd -n myrac02 -fixup -verbose
(九) 添加
进入节点1,$GRID_HOME /oui/bin
$ ./addNode.sh CLUSTER_NEW_NODES={myrac02} cluster_new_virtual_hostnames={myrac02-vip} -ignorePrereq
(十)验证
cluvfy stage –post nodeadd –n myrac02 –verbose
Step 1: Add an application VIP
The first step is to create an application VIP. The VIP will be used to access Oracle GoldenGate (e.g. by a remote pump or by the Management Pack for Oracle GoldenGate). Oracle Clusterware will assign the VIP to a physical server, and migrate the VIP if that server were to go down or if you instruct Clusterware to do so.
To create the application VIP, login as root and run:
GRID_HOME/bin/appvipcfg create -network=1 \
-ip=192.168.214.190 \
-vipname=mvggatevip \
-user=root
As root, allow the Oracle Grid infrastructure software owner (e.g. oracle) to run the script to start the VIP.
GRID_HOME/bin/crsctl setperm resource mvggatevip -u user:oracle:r-x
Then, as oracle, start the VIP:
GRID_HOME/bin/crsctl start resource mvggatevip
To validate whether the VIP is running and on which node it is running, execute:
GRID_HOME/bin/crsctl status resource mvggatevip
For example:
[oracle@coe-01 ~]$ crsctl status resource mvggatevip
NAME=mvggatevip
TYPE=app.appvip.type
TARGET=ONLINE
STATE=ONLINE on coe-02
可以参考http://www.cnblogs.com/lhrbest/p/4576361.html进行配置
Step 2: Develop an agent script
Step 3: Register a resource in Oracle Clusterware
Connect as the oracle and execute:
1. Use another dependency to a local resource ora.asm. This resource is available if the ASM instance is running. This introduces a slight change to the crsctl add resource command (changes highlighted): GRID_HOME/bin/crsctl add resource ggateapp \ -type cluster_resource \ -attr 'ACTION_SCRIPT=/mnt/acfs/oracle/grid/11gr2_gg_action.scr, CHECK_INTERVAL=30, START_DEPENDENCIES='hard(mvggatevip,ora.asm) pullup(mvggatevip)', STOP_DEPENDENCIES='hard(mvggatevip)''
Oracle GoldenGate software owner. Run this command as root.
GRID_HOME/bin/crsctl setperm resource ggateapp -o mvandewiel
Step 4: Start the application
From now on you should always use Oracle Clusterware to start Oracle GoldenGate. Login as oracle and execute:
GRID_HOME/bin/crsctl start resource ggateapp
To check the status of the application:
GRID_HOME/bin/crsctl status resource ggateapp
For example:
[oracle@coe-02 grid]$ crsctl status resource ggateapp
NAME=ggateapp
TYPE=cluster_resource
TARGET=ONLINE
STATE=ONLINE on coe-02
[oracle@coe-02 grid]$
Manage the application
When Oracle GoldenGate is running, and you want to move Oracle GoldenGate to run on a different server, you can use the GRID_HOME/bin/crsctl relocate resource command with the force option to move the VIP as well (as oracle, on any node):
[oracle@coe-02 grid]$ crsctl relocate resource ggateapp -f
CRS-2673: Attempting to stop 'ggateapp' on 'coe-01'
CRS-2677: Stop of 'ggateapp' on 'coe-01' succeeded
CRS-2673: Attempting to stop 'mvggatevip' on 'coe-01'
CRS-2677: Stop of 'mvggatevip' on 'coe-01' succeeded
CRS-2672: Attempting to start 'mvggatevip' on 'coe-02'
CRS-2676: Start of 'mvggatevip' on 'coe-02' succeeded
CRS-2672: Attempting to start 'ggateapp' on 'coe-02'
CRS-2676: Start of 'ggateapp' on 'coe-02' succeeded
[oracle@coe-02 grid]$
Cleanup
If you want to stop Oracle Clusterware from managing Oracle GoldenGate, and you want to cleanup the changes you made, then:
Stop Oracle GoldenGate (login as oracle):
GRID_HOME/bin/crsctl stop resource ggateapp
Stop the VIP (as oracle):
GRID_HOME/bin/crsctl stop resource mvggatevip
Delete the application ggateapp as the application owner (mvandewiel) or root:
GRID_HOME/bin/crsctl delete resource ggateapp
Delete the VIP (login as root):
GRID_HOME/bin/appvipcfg delete -vipname=mvggatevip
Delete the agent action script 11gr2_gg_action.scr at the OS level.
srvctl add listener -l NEW_MACLEAN_LISTENER -o $CRS_HOME -p 1601 -k 1
在listener.ora文件中加入如下信息:
SID_LIST_NEW_MACLEAN_LISTENER =
(SID_LIST =
(SID_DESC =
(GLOBAL_DBNAME = VPROD)
(ORACLE_HOME = /g01/11.2.0/grid)
(SID_NAME = VPROD1)
)
)
for ((i=1;i<400;i++));do
sqlplus system/[email protected]/prod < insert into connecttest select 'PROD'||userenv('INSTANCE'),sysdate from dual; commit; exit; eof done 1、本报告内容所使用的环境为OEL 6.5,Oracle RAC 11.2.0.3 PSU 10 2、 为了减少RAC负载均衡带来过多的GC等待,需要将业务进行节点分离。通过Oracle TAF技术,配置Service信息,对不同业务的连接进行管理,使每个业务固定在指定的一个或者多个节点。当节点出现故障时,Service能够自动Failover,而Failover的过程,前端应用是没有影响的。 3、通过Service实现节点的负载均衡。 1、主备模式 配置TAF,可以通过DBCA图形化配置。这里使用命令行的方式。 (1) 添加service [oracle@rac1 ~]$ srvctl add service -d BDNP -s BDSM -r 'BDNP1' -a 'BDNP2' -P PRECONNECT -e SELECT -x TRUE [oracle@rac1 ~]$ srvctl config service -d BDNP -s BDSM -a Warning:-a option has been deprecated and will be ignored. Service name: BDSM Service is enabled Server pool: BDNP_BDSM Cardinality: 1 Disconnect: false Service role: PRIMARY Management policy: AUTOMATIC DTP transaction: true AQ HA notifications: false Failover type: SELECT Failover method: NONE TAF failover retries: 0 TAF failover delay: 0 Connection Load Balancing Goal: LONG Runtime Load Balancing Goal: NONE TAF policy specification: PRECONNECT Edition: Preferred instances: BDNP1 Available instances: BDNP2 (2) 更改service配置 srvctl modify service -d BDNP -s BDSM -m BASIC -e SELECT -q TRUE -j LONG srvctl modify service –s prod –d handtask –a handtask2 –i handtak1 –n srvctl modify service –s prod –d handtask –P BASIC –m BASIC –e select –j long srvctl config service -d BDNP -s BDSM Service name: BDSM Service is enabled Server pool: BDNP_BDSM Cardinality: 1 Disconnect: false Service role: PRIMARY Management policy: AUTOMATIC DTP transaction: true AQ HA notifications: true Failover type: SELECT Failover method: BASIC TAF failover retries: 0 TAF failover delay: 0 Connection Load Balancing Goal: LONG Runtime Load Balancing Goal: NONE TAF policy specification: PRECONNECT Edition: Preferred instances: BDNP1 Available instances: BDNP2 (3)启动service [oracle@rac1 ~]$ srvctl start service -d BDNP -s BDSM (4)关闭和删除service 删除service之前要先关闭,如果不关闭,要用-f参数。 [oracle@rac1 ~]$ srvctl stop service -d BDNP -s BDSM [oracle@rac1 ~]$ srvctl remove service -d BDNP -s BDSM [-f] (5) Switch service 当主节点挂掉后,service会切换到备节点。主节点恢复后,service不会自动切换回来,需要手工干预。-i是service启动节点,-t是切换目标节点。比如要将service从BDNP1切换到BDNP2上: [oracle@rac1 ~]$srvctl relocate service -d BDNP -s BDVMP -i BDNP1 -t BDNP2 注意:不要选择业务高峰期切换,否则切换会超时导致切换失败,同时影响session连接。 2、 负载均衡模式 负载均衡模式相对简单,service会在所有的instance中注册。 (1) 添加service [oracle@rac1 ~]$srvctl add service -d BDNP -s BDVMP -r BDNP1,BDNP2 –P BASIC (2) 修改service [oracle@rac1 ~]$ srvctl config service -d BDNP -s BDPK Service name: BDPK Service is enabled Server pool: BDNP_BDPK Cardinality: 2 Disconnect: false Service role: PRIMARY Management policy: AUTOMATIC DTP transaction: false AQ HA notifications: true Failover type: SELECT Failover method: BASIC TAF failover retries: 0 TAF failover delay: 0 Connection Load Balancing Goal: LONG Runtime Load Balancing Goal: NONE TAF policy specification: BASIC Edition: Preferred instances: BDNP1,BDNP2 Available instances: (3) 关闭和删除service 同主备模式。 1、关闭数据库 通过shutdown immediate关闭数据库,Service可以自行切换到另一个节点。 通过srvctl stop instance关闭数据库,service不自动切换,service挂起,无法提供服务。要通过-f参数才可以实现service切换。 srvctl stop instance –d BDNP –I BDNP1 -f 2、关闭Cluster 关闭cluster,service可以自动切换。 1、 当关闭instance时候,要添加-f参数。 2、 Instance恢复后,及时将service relocate,否则业务繁忙期relocate可能会失败,还会影响业务。 3、 Service name 不会同步到dataguard中。 GOBO4_TAF = (DESCRIPTION = (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.7.61)(PORT = 1521)) (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.7.62)(PORT = 1521)) (LOAD_BALANCE = yes) (CONNECT_DATA = (SERVER = DEDICATED) (SERVICE_NAME = GOBO4) (FAILOVER_MODE = #FAILOVER_MODE项参数 (TYPE = session) (METHOD = basic) (RETRIES = 180 (DELAY = 5) ) ) ) FAILOVER_MODE项是实现TAF的主要配置内容,下面对其进行描述. METHOD: 用户定义何时创建到其实例的连接,有BASIC 和 PRECONNECT 两种可选值 BASIC: 客户端通过地址列表成功建立连接后,即仅当客户端感知到节点故障时才创建到其他实例的连接 PRECONNECT: 预连接模式,是在最初建立连接时就同时建立到所有实例的连接,当发生故障时,立刻就可以切换到其他链路上 上述两种方式各有优劣,前者建立连接的开销相对较小,但failover时会产生延迟,而后者正好与前者相反 TYPE: 用于定义发生故障时对完成的SQL 语句如何处理,其中有2种类型:session 和select select:使用select方式,Oracle net会跟踪事务期间的所有select语句,并跟踪每一个与当前select相关的游标已返回多少行给客户 端。此时,假定select查询已返回500行,客户端当前连接的节点出现故障,Oracle Net自动建立连接到幸存的实例上并继续返回 剩余的行数给客户端。假定总行数为1500,行,则1000行从剩余节点返回。 session: 使用session方式,所有select查询相关的结果在重新建立新的连接后将全部丢失,需要重新发布select命令。 上述两种方式适用于不同的情形,对于select方式,通常使用与OLAP数据库,而对于session方式则使用与OLTP数据库。因为select 方式,Oracle 必须为每个session保存更多的内容,包括游标,用户上下文等,需要更多的资源。 其次,两种方式期间所有未提交的DML事务将自动回滚且必须重启启动。alter session语句不会failover。 临时对象不会failover也不能被重新启动。 RETRIES: 表示重试的次数 DELAY:表示重试的间隔时间 1 3 5 建议配置个数 crsctl query css votedisk crsctl replace votedisk +DGSYS oracle通过在/etc/oracle/ocr.loc(linux)文件中指定ocr在共享存储上的位置,/var/opt/oracle/ocr.loc(Solaris System系统存放的位置) ocrcheck [root@rac01 bin]# ./ocrconfig –manualbackup --手工备份 异常恢复 crsctl stop crs crsctl start crs –excl crsctl stop resource ora.crsd –init ocrconfig –restore file_name crsctl stop crs –f crsctl start crs ---添加ocr文件 ./ocrconfig -add +fra ./ocrconfig -delete +ocr ./ocrconfig -replace +fra -replacement +ocr Oracle 11g R2 Grid Infrastructure 的安装与配置较之前的版本提供了更多的灵活性。在Grid Infrastructure安装完毕前执行root.sh经常容易出现错误,并且需要修复该错误才能得以继续。在这个版本中我们可以直接通过执行脚本rootcrs.pl来重新配置Grid Infrastructure而无需先卸载Grid Infrastructure,然后修复故障后进行再次安装。下面描述了rootcrs.pl的用法。 #重新配置Grid Infrastructure并不会移除已经复制的二进制文件,仅仅是回复到配置crs之前的状态,下面是其步骤 1 重新创建olr文件。 a、使用root用户登录,并执行下面的命令(所有节点,但最后一个节点除外) #$GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force b、同样使用root用户在最后一个节点执行下面的命令。该命令将清空ocr 配置和voting disk # $GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force -lastnode CRS-4611: Successful deletion of voting disk +DATA. CRS-4611: Successful deletion of voting disk +OCR.—删除OCR ---最后一个节点将删除OCR磁盘组, ---其他节点在执行时,不会对OCR磁盘组进行删除,只是删除ASM、VIP以及local OLR ---如果节点在 c、如果使用了ASM磁盘,继续下面的操作以使得ASM重新作为候选磁盘 # dd if=/dev/zero of=/dev/sdb1 bs=1024 count=100 --清除对应的ocr磁盘组即可(是否对数据磁盘组有影响) # /etc/init.d/oracleasm deletedisk DATA /dev/sdb1 # /etc/init.d/oracleasm createdisk DATA /dev/sdb1 3.在节点1 执行下面脚本 $GRID_HOME./root.sh 日志: tail -f /u01/app/11.2.0/grid/cfgtoollogs/crsconfig/rootcrs_myrac01.log 1 创建OLR OLR initialization – successful 2 Disk Group OCR created successfully. (第一个节点执行root.sh时) [root@rac1 grid]# ./root.sh Performing root user operation for Oracle 11g The following environment variables are set as: ORACLE_OWNER= grid ORACLE_HOME= /u01/app/11.2.0/grid Enter the full pathname of the local bin directory: [/usr/local/bin]: The contents of "dbhome" have not changed. No need to overwrite. The contents of "oraenv" have not changed. No need to overwrite. The contents of "coraenv" have not changed. No need to overwrite. Entries will be added to the /etc/oratab file as needed by Database Configuration Assistant when a database is created Finished running generic part of root script. Now product-specific root actions will be performed. Using configuration parameter file: /u01/app/11.2.0/grid/crs/install/crsconfig_params User ignored Prerequisites during installation Installing Trace File Analyzer OLR initialization - successful Adding Clusterware entries to upstart CRS-2672: Attempting to start 'ora.mdnsd' on 'rac1' CRS-2676: Start of 'ora.mdnsd' on 'rac1' succeeded CRS-2672: Attempting to start 'ora.gpnpd' on 'rac1' CRS-2676: Start of 'ora.gpnpd' on 'rac1' succeeded CRS-2672: Attempting to start 'ora.cssdmonitor' on 'rac1' CRS-2672: Attempting to start 'ora.gipcd' on 'rac1' CRS-2676: Start of 'ora.cssdmonitor' on 'rac1' succeeded CRS-2676: Start of 'ora.gipcd' on 'rac1' succeeded CRS-2672: Attempting to start 'ora.cssd' on 'rac1' CRS-2672: Attempting to start 'ora.diskmon' on 'rac1' CRS-2676: Start of 'ora.diskmon' on 'rac1' succeeded CRS-2676: Start of 'ora.cssd' on 'rac1' succeeded 已成功创建并启动 ASM。 已成功创建磁盘组DATA。 4.在节点2 执行下面脚本 $GRID_HOME/root.sh 监控日志:$GRID_HOME/log/{hostname}|alter* 5. 把其他磁盘组加载 Sqlplus / as sysasm alter diskgroup data mount; alter diskgroup fra mount; ora.DATA.dg ora....up.type 0/5 0/ ONLINE ONLINE myrac02 ora.FRA.dg ora....up.type 0/5 0/ ONLINE ONLINE myrac02 --资源状态正常了 srvctl add database -d pmstest -o /u01/app/oracle/11.2.0/db_1 srvctl add instance -d pmstest -i pmstest1 -n pmstest1 srvctl add instance -d pmstest -i pmstest2 -n pmstest2 srvctl start database -d pmstest 如果节点grid出现问题,可以进行重新配置 # perl $GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force #GRID_HOME./root.sh 案例: 主节点出现问题,进行rootcrs.pl -verbose -deconfig –force, [grid@myrac02 ~]$ olsnodes -s -t myrac01 Inactive Unpinned myrac02 Active Unpinned 则节点2资源如下: 在节点1执行root.sh后: 部分日志: Entries will be added to the /etc/oratab file as needed by Database Configuration Assistant when a database is created Finished running generic part of root script. Now product-specific root actions will be performed. Using configuration parameter file: /u01/app/11.2.0/grid/crs/install/crsconfig_params User ignored Prerequisites during installation Installing Trace File Analyzer OLR initialization - successful Adding Clusterware entries to upstart CRS-4402: The CSS daemon was started in exclusive mode but found an active CSS daemon on node myrac02, number 2, and is terminating An active cluster was found during exclusive startup, restarting to join the cluster srvctl add database -d prod -o /u01/app/oracle/11.2.0/db_1 srvctl add instance -d prod -i prod1 -n myrac01 只添加到当前的实例 ALTER DATABASE ADD LOGFILE GROUP 5 '+DATA02'; ALTER DATABASE ADD STANDBY LOGFILE GROUP 5 '+DATA02'; SQL> desc v$standby_log 指定实例添加日志组 ALTER DATABASE ADD LOGFILE instance 'orcll2' GROUP 6 '+DATA02' ; alter database add logfile thread 2 group 3 size 30M; ALTER DATABASE ADD STANDBY LOGFILE instance 'orcll2' GROUP 6 '+DATA02' ; alter database add STANDBY logfile thread 1 group 5 ; ALTER SYSTEM ARCHIVE LOG instance 'orcll2' CURRENT; ALTER SYSTTEM CHECKPOINT LOCAL; Alter database enable thread 2; 1、主要步骤: 备份spfile,以防止参数修改失败导致数据库无法启动 修改集群参数cluster_database为false 启动单实例到mount状态 将数据库置于归档模式(alter database archivelog/noarchivelog) 修改集群参数cluster_database为true 关闭单实例 启动集群数据库 select instance_name,host_name,status from gv$instance; create pfile='/u01/oracle/db/dbs/ora10g_robin.ora' from spfile; alter system set cluster_database=false scope=spfile sid='*'; srvctl stop database -d ora10g -->关闭数据库 srvctl start instance -d ora10g -i ora10g1 -o mount -->启动单个实例到mount状态 alter database archivelog; alter system set cluster_database=true scope=spfile sid='*'; ho srvctl stop instance -d ora10g -i ora10g1 ho srvctl start database -d ora10g --调整闪回模式: select flashback_on from v$database; alter system set cluster_database=false scope=spfile; SQL> alter system set db_recovery_file_dest_size=1g scope=spfile; SQL> alter system set db_recovery_file_dest='/ogg' scope=spfile; [grid@myrac02 ~]$ srvctl stop database -d prod [grid@myrac02 ~]$ srvctl start instance -d prod -i prod1 -o mount alter database flashback on; alter system set cluster_database=true scope=spfile; [grid@myrac01 ~]$ crsctl start res ora.prod.db =srvctl start database –d prod CRS-2672: Attempting to start 'ora.prod.db' on 'myrac01' CRS-2672: Attempting to start 'ora.prod.db' on 'myrac02' CRS-2676: Start of 'ora.prod.db' on 'myrac01' succeeded CRS-2676: Start of 'ora.prod.db' on 'myrac02' succeeded [grid@myrac01 ~]$ crsctl status res ora.prod.db NAME=ora.prod.db TYPE=ora.database.type TARGET=ONLINE , ONLINE STATE=ONLINE on myrac01, ONLINE on myrac02 共享存储 RMAN> backup archivelog all format '/u01/app/oracle/bakup/archbak_20151123.arc' delete all input; delete archivelog until sequence 8 thread 1; delete archivelog until sequence 8 thread 2; DELETE ARCHIVELOG FROM SEQUENCE 20 UNTIL SEQUENCE 28 THREAD 1; DELETE ARCHIVELOG FROM SEQUENCE 20 UNTIL SEQUENCE 28 THREAD 2; BACKUP ARCHIVELOG FROM SEQUENCE 20 UNTIL SEQUENCE 28 THREAD 1; BAKUP ARCHIVELOG FROM SEQUENCE 20 UNTIL SEQUENCE 28 THREAD 2; 本地存储 run { allocate channel c1 device type disk connect sys/oracle@orcl1; allocate channel c2 device type disk connect sys/oracle@orcl2; backup archivelog all format '/home/oracle/123_%u.arc'; } RMAN> run { allocate channel c1 device type disk format '/soft/backup/%U' connect sys/6212327@rac1; allocate channel c2 device type disk format '/soft/backup/%U' connect sys/6212327@rac2; allocate auxiliary channel ac1 device type disk format '/soft/backup/%U'; allocate auxiliary channel ac2 device type disk format '/soft/backup/%U'; duplicate target database for standby; } oracle官方给出的定义: Oracle Automatic Storage Management Cluster File System (Oracle ACFS) is a multi-platform, scalable file system, and storage management technology that extends Oracle Automatic Storage Management (Oracle ASM) functionality to support customer files maintained outside of Oracle Database. Oracle ACFS supports many database and application files, including executables,database trace files, database alert logs, application reports, BFILEs, and configuration files. Other supported files are video, audio, text, images, engineering drawings, and other general-purpose application file data. 大意是ACFS是一个支持多个平台,可扩展的,基于ASM的集群文件系统,可以用来存储数据库和各种应用的文件,包括数据库的trace文件,alert日志文件和配置文件等等,也可以用来存储视频,音频,文本,图片等文件! 在这之前,集群文件系统使用过redhat的gfs,还有开源的ocfs2,gfs的感觉是配置太繁琐且复杂,因为是基于RHCS套件,所以很多功能对应单纯的集群文件系统来说显得有些冗余;ocfs2属于被放弃的孩子,目前已经停止开发了,不过胜在配置简单;ACFS在11g r2中推出,基于grid infrastructure,配置上也算容易,且在ASM的基础上进行发展,稳定性应该还是有保证的,下一步打算利用ACFS测试下rac环境下的golden gate复制!下面来介绍下11G RAC环境下ACFS的使用! [grid@rac1 ~]$ crs_stat -t -v ora.registry.acfs Name Type R/RA F/FT Target State Host ---------------------------------------------------------------------- ora....ry.acfs ora....fs.type 0/5 0/ ONLINE ONLINE rac1 [grid@rac1 ~]$ crs_stat -t -v ora.ACFS.dg Name Type R/RA F/FT Target State Host ---------------------------------------------------------------------- ora.ACFS.dg ora....up.type 0/5 0/ ONLINE ONLINE rac1 二:使用asmca图形化工具,在asm磁盘组中创建volume并格式化成ACFS文件系统 [root@rac1 ~]# su - grid [grid@rac1 ~]$ !exp export DISPLAY=192.168.1.105:0 [grid@rac1 ~]$ asmca cd /u01/app/grid/cfgtoollogs/asmca/scripts 三:查看两个节点是否均已成功挂载ACFS,并测试读写 [grid@rac1 ~]$ df -h Filesystem Size Used Avail Use% Mounted on /dev/vda3 26G 14G 11G 58% / /dev/vda1 99M 12M 83M 13% /boot tmpfs 1.2G 787M 441M 65% /dev/shm /dev/asm/vol1-330 5.0G 75M 5.0G 2% /u01/app/grid/acfsmounts/acfs_vol1 [grid@rac1 ~]$ ssh rac2 "df -h" Filesystem Size Used Avail Use% Mounted on /dev/vda3 26G 14G 10G 59% / /dev/vda1 99M 12M 83M 13% /boot tmpfs 1.2G 787M 441M 65% /dev/shm /dev/asm/vol1-330 5.0G 75M 5.0G 2% /u01/app/grid/acfsmounts/acfs_vol1 [grid@rac1 ~]$ cd /u01/app/grid/acfsmounts/acfs_vol1 [grid@rac1 acfs_vol1]$ ls lost+found drwx------ 2 root root 65536 Jul 9 09:24 lost+found [grid@rac1 acfs_vol1]$ cp /etc/passwd ./ [grid@rac2 ~]$ cd /u01/app/grid/acfsmounts/acfs_vol1 [grid@rac2 acfs_vol1]$ ls lost+found passwd [grid@rac2 acfs_vol1]$ head -1 passwd root:x:0:0:root:/root:/bin/bash ASMCMD> volcreate –G DATAC1 –s 200G vol1 ASMCMD> volinfo –G DATAC1 vol1 mount -t acfs /dev/asm/vol2-330 /vol2/ mkfs.acfs /dev/asm/vol2-330 [root@rac1 ~]# acfsutil registry -a /dev/asm/vol2-330 /ogg(注册后,节点2会自动挂载) 双节点创建/aradmin目录。将权限给予oracle:oinstall [grid@rac1 ~]$ echo $ORACLE_SID +ASM1 [grid@rac1 ~]$ asmcmd ASMCMD> ls ACFS/ CRS/ DATA/ FRA/ ASMCMD> volcreate //查看帮助命令 usage: volcreate -G diskgroup -s size [ --column number ] [ --width stripe_width ] [--redundancy {high|mirror|unprotected} ] [--primary {hot|cold}] [--secondary {hot|cold}] volume help: help volcreate ASMCMD> volcreate -G ACFS -s 5G vol2 //如果空间不足就会报这个错误 ASMCMD> volcreate –G DATAC1 –s 200G vol1 ASMCMD>mkfs.acfs /dev/asm/vol2-330 ORA-15032: not all alterations performed ORA-15041: diskgroup "ACFS" space exhausted (DBD ERROR: OCIStmtExecute) ASMCMD> volcreate -G ACFS -s 4G vol2 ASMCMD> volinfo -G ACFS vol2 Diskgroup Name: ACFS Volume Name: VOL2 Volume Device: /dev/asm/vol2-330 State: ENABLED Size (MB): 4096 Resize Unit (MB): 32 Redundancy: UNPROT Stripe Columns: 4 Stripe Width (K): 128 Usage: Mountpath: ASMCMD> volinfo -G ACFS vol1 Diskgroup Name: ACFS Volume Name: VOL1 Volume Device: /dev/asm/vol1-330 State: ENABLED Size (MB): 5120 Resize Unit (MB): 32 Redundancy: UNPROT Stripe Columns: 4 Stripe Width (K): 128 Usage: ACFS Mountpath: /u01/app/grid/acfsmounts/acfs_vol1 [root@rac1 ~]# mkdir /vol2 (节点2创建相同的挂载点) [root@rac1 ~]# mkfs.acfs: version = 11.2.0.3.0 mkfs.acfs: on-disk version = 39.0 mkfs.acfs: volume = /dev/asm/vol2-330 mkfs.acfs: volume size = 4294967296 mkfs.acfs: Format complete. [root@rac1 ~]# mount -t acfs /dev/asm/vol2-330 /vol2/ [root@rac1 ~]# df -h /vol2 Filesystem Size Used Avail Use% Mounted on /dev/asm/vol2-330 4.0G 45M 4.0G 2% /vol2 [root@rac1 ~]# acfsutil registry -a /dev/asm/vol2-330 /ogg(注册后,节点2会自动挂载) acfsutil registry: mount point /vol2 successfully added to Oracle Registry Asfs资源的启动与停止 crsctl stop res “ora.registry.acfs” crsctl start res “ora.registry.acfs” 挂载与卸载 mount.acfs -o all umount -t acfs –a [root@rac1 ~]# mount -t acfs /dev/asm/vol2-330 /vol2/ 把文件系统扩展到30G; $acfsutil size 30G /ogg $acfsutil size +20m /ogg $acfsutil size -20m /ogg $ acfsutil registry -a /dev/asm/oggvol-48 /ogg gpnp profile是一个xml文件,保存在每个节点的本地,这个文件记录的信息是节点要加入集群中所需要的基础信息,这个文件也需要节点间同步,GRID设计GPNPD进程来进行节点间同步。 $GRID_HOME/gpnp/$HOSTNAME/profile/peer/profile.xml $GRID_HOME/gpnp/profile/peer/profile.xml(全局备份) GPnP Profile 的功能类似于 SPFILE,其用于保存启动数据库集群程序所需的必要信息,如下表所示。 GPnP Profile 参数 集群名称(Cluster name) 网络类型,包含业务及心跳网络(Network classifications, Public/Private) 用于 CSS 进程的存储(Storage to be used for CSS) 用于 ASM 的存储(Storage to be used for ASM : SPFILE location, ASM DiskString etc) 数字签名信息(Digital signature information):GPnP Profile 对安全十分敏感,其可识别根分区的信息,并且保存了数据签名的配置权限。 GPnP Profile 文件是一个保存于 $GRID_HOME/gpnp/ 其用于正确描述 RAC 每个节点的全局特性。每个节点上都会保存一个本地的 GPnP Profile,并且由 GPnP 守护进程(GPnPD)维护。 GPnP Profile 将会在以下情况被更新。 (1).GPnP 守护进程(GPnPD)将在软件安装、系统启动或 Profile 更新时复制 Profile 的变化,以此实现所有节点 GPnP Profile 的同步更新。 (2).当由 oifcfg、crsctl、asmcmd 等命令更改集群配置时,GPnP Profile 都将被同步更新。 启动集群软件时,需要访问仲裁盘(Voting Disk)。当仲裁盘为 ASM 磁盘时,以上仲裁盘的信息需要从 GPnP Profile 中读取,其中 GPnP Profile 对仲裁盘信息描述如下。 即使 ASM 实例没有启动,仲裁盘的信息依旧可以通过 kfed 功能读取。(he voting disk is read using kfed utility even if ASM is not up.) 随后,集群软件将检查是否所有的 RAC 节点都更新了 GPnP Profile 信息,并且 RAC 各节点将依据 GPnP 的配置信息加入集群中。当一个节点加入集群或在集群中启动时,集群软件将在节点上启动 GPnP agent。当节点已经在集群内部时,GPnP agent 将读取已存在于节点内的 GPnP profile。当节点被新加至集群时,GPnP agent 将通过组播协议(mDNS)定位到一个已存在于集群的节点的 GPnP agent,并且从该源端的 agent 上获取 GPnP profile。 随后,CRSD 进程将需要读取 OCR 信息以启动节点上的多个资源,并根据资源状态更新 OCR 信息。(Next CRSD needs to read OCR to startup various resources on the node and hence update it as status of resources changes.)因为 OCR 信息保存于 ASM 磁盘内,所以 CRSD 读取 OCR 信息前需获知 ASM Spfile 参数文件的路径。需要在不同的参数文件中查找 ASM Spfile 路径,因此查找文件的顺序如下:(1).GPnP Profile;(2).ORACLE_HOME/dbs/spfile 可使用 gpnptool 工具对 GPnP Profile 进行维护,常见命令如下。 (1).$> gpnptool get:读取 GPnP Profile 内容(How to read the profile) (2).$> gpnptool lfind:查看运行于本地节点的 GPnP 守护进程(How to find GPnP Deamons are running on the local node) (3).$> gpnptool find:查看基于 mDNS 协议可以探测到的所有 GPnP 守护进程(How to find all RD-discoverable resources of given type) (4).$> gpnptool getpval - md_backup /tmp/dgbackup20090716 md_restore --full -G oradg /tmp/oradgbackup20110323 Oracle RAC DRM基本概念 Linux社区 2012年07月18日 在Oracle RAC中,每个实例均存在一个数据缓存池,每个block的改变都将实例间进行资源协调以获取最大化性能,从而保证数据的完整性。 在RAC集群中,每个被缓存的数据copy,也叫做缓存资源,均存在一个叫做master节点的实例。 在10.1.0.2中,一旦一个cache resource被master一个实例节点, 对缓存资源的重新remaster或者说master节点的自动改变仅仅会发生在RAC实例节点的正常启停或者集群管理资源间的非正常问题发生。 也就是说,如果NODE B是一个缓存资源的master节点,这个资源将被一直master到NODE B直到发生RAC节点的重新配置操作。 在oracle 10g中引进一个新的资源remaster概念叫做DRM(Dynamic Resource management [ID 390483.1]),通过DRM,RAC实例节点的重新配置 已经不再是cache资源被重新remaster的因素,如果cache resource被节点A频繁访问,一个资源可以从NODE B remaster到NODE A。 其他的一些概念如下: In 10gR1 DRM is driven by affinity of files and in 10gR2 it is based on objects. DRM attributes are intentionally undocumented since they may change depending on the version. These attributes should not be changed without discussing with Support. Two instance will not start a DRM operation at the same time however lmd,lms,lmon processes from all instances collectively take part in the DRM operation. Normal activity on the database is not affected due to DRM. This means users continue insert/update/delete operations without any interruptions. Also DRM operations complete very quickly. DRM many cause" latch: cache buffers chains" and "latch: object queue header operation " wait event, you can go throught this way to disable DRM: _gc_affinity_time=0 _gc_undo_affinity=FALSE also, you can used another two implicit parameters dynamic change _gc_affinity_limit=10000000 _gc_affinity_minimum=10000000 如上的值可以根据实际要求改变。 在10g中,可以采用如下方式禁用DRM(当然你也可以只禁用其中的一个模块object affinity或者undo affinity) 在11g中,使用如下语法创建普通冗余磁盘组用于存放ocr和votedisk CREATE DISKGROUP ocr NORMAL REDUNDANCY FAILGROUP a1 DISK '/dev/asm_a1_ocr2' name a1ocr2 FAILGROUP a2 DISK '/dev/asm_a2_ocr2' name a2ocr2 ATTRIBUTE 'compatible.asm' = '11.2'; 这样可以做到一个存储物理故障时,无缝迁移到第二个存储上 但是在使用这种方法作votedisk盘的迁移工作时,报一下错误: [grid@pay1 ~]$ crsctl replace votedisk +OCR Failed to create voting files on disk group OCR. Change to configuration failed, but was successfully rolled back. CRS-4000: Command Replace failed, or completed with errors. 查找其原因,是因为 NORMAL REDUNDANCY的磁盘组需要有个3个failgroup才能用于存放votedisk. 因此需要想办法弄出第三个failgroup,但是第三个failgroup放置在任何一个存储上都不合适,那个存储上放置了2个failgroup,那么这个存储故障时,整个cluster都将由于votedisk没有超过半数而不能正常运行. 那如何构造第三个failgroup在此时就变得尤为重要了.以下提供集中办法 1.使用nfs技术新挂载一个磁盘,然后将此盘加载成quorum failgroup组,这样就成3个failgroup.metalink上有关于此的说明(Oracle Support Document 1421588.1 (How to Manually Add NFS voting disk to an Extended Cluster using ASM in 11.2) can be found at: https://support.oracle.com/epmos/faces/DocumentDisplay?id=1421588.1) 2.两个存储上都划一个小盘500M,在os系统中做lv镜像,然后使用lv镜像作quorum failgroup组,这样就成3个failgroup.原理和2相同,就是要一个盘做failgroup. 本文仅将第一种方案提供具体的步骤和说明 1.选择一个nfs服务器,并配置合适的权限.为简单示例,为对访问作任何限制 vi /etc/exports /mypool/oraclevotedisk * (rw) 重启动nfs服务 service portmap restart service nfs restart 2.在rac环境中的每一台都创建一个空目录 mkdir /oracle/votedisk 3.挂载分区 mount -o rw,bg,hard,intr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600 192.168.100.2:/mypool/oraclevotedisk /oracle/votedisk 3.在任意一台机器上使用此分区作一个投票盘,并授予合适的权限 dd if=/dev/zero of=/oracle/votedisk/vote_pay bs=1M count=500 --dd if=/dev/zero of=/oracle/votedisk/vote_pay bs=1M count=1000 授予权限 chown grid:asmadmin /oracle/votedisk/vote_pay 4.在一个asm实例中,修改查找串路径,并增加投票盘 alter system set asm_diskstring='/dev/asm*','/oracle/votedisk/vote_pay'; --看文件是否已经被找到 col path format A40 select group_number,name,path,mount_status,header_status,state,REDUNDANCY,FAILGROUP,voting_file from v$asm_disk; --增加投票盘 alter diskgroup ocr add quorum failgroup nfs disk '/oracle/votedisk/vote_pay'; 5.以grid用户切换votedisk [grid@pay1 ~]$ crsctl replace votedisk +OCR Successful addition of voting disk 58c1ac72dff94f25bffc8e649a36c883. Successful addition of voting disk 076f0b3e9b0a4f5cbf26841c540211a7. Successful addition of voting disk 84cf735c784e4f74bf5d55fc99e98422. Successful deletion of voting disk 73fb4a797e624fa9bf382f841340dfa8. Successfully replaced voting disk group with +OCR. 可以看到,现在就可以成功替换投票盘了 6.检查votedisk情况 [grid@pay1 ~]$ crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 58c1ac72dff94f25bffc8e649a36c883 (/dev/asm_a1_ocr3) [OCR] 2. ONLINE 076f0b3e9b0a4f5cbf26841c540211a7 (/dev/asm_a2_ocr3) [OCR] 3. ONLINE 84cf735c784e4f74bf5d55fc99e98422 (/oracle/votedisk/vote_pay) [OCR] Located 3 voting disk(s). How to Manually Add NFS voting disk to an Extended Cluster using ASM in 11.2 (Doc ID 1421588.1) Mount Options for Oracle files when used with NFS on NAS devices (Doc ID 359515.1) RAC: Frequently Asked Questions [ID 220970.1] *********************************************************** 什么是Quorum FailGroup *********************************************************** Quorum FailGroup中,只保存Voting Disk,用于RAC on Extended Distance Clusters,做仲裁盘用 要放在其他存储节点上或者通过NFS共享一个zero-padded文件作为voting disk 对于使用2个阵列提供保护的环境中,也可以使用Quorum FG 假设使用high冗余,一共5个投票盘。那么5个投票盘在2个阵列上的数目肯定不同 假设一个阵列上有3个,另一个阵列上有2个,如果存在3个投票盘的阵列宕机,那么将只有2个投票盘可用 这种情况下,集群的所有节点将宕机 这种情况下,必须在第三个地点放置一个投票盘,在2个阵列上各有2个投票盘。这样任意一个阵列宕机,还可以保证有3个投票盘是可用的 *********************************************************** 如何添加Quorum FailGroup *********************************************************** 一般有3个阵列的情况不多,下面测试通过NFS来配置 1.当前情况: HIGH冗余,5个投票盘 [oracle@database2 ~]$ crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 7fc99b8d4dc54f27bf967f23524a19e3 (/dev/asm-crs1) [CRSDG] 2. ONLINE 2ffd39d609bc4f71bf2b19de2e71c7a8 (/dev/asm-crs2) [CRSDG] 3. ONLINE 002eb188e9b14ffbbf4d5a607ade51c2 (/dev/asm-crs3) [CRSDG] 4. ONLINE 2319348cf3cc4f6abf116f973d8fd922 (/dev/asm-crs4) [CRSDG] 5. ONLINE 6c3cb875ba7e4fe2bffe97189e2bae25 (/dev/asm-crs5) [CRSDG] Located 5 voting disk(s). SQL> select GROUP_NUMBER,DISK_NUMBER,OS_MB,NAME,PATH,FAILGROUP,state from v$asm_disk order by 1,2; GROUP_NUMBER DISK_NUMBER OS_MB NAME PATH FAILGROUP STATE ------------ ----------- ---------- ------------------------------ --------------- ------------------------------ -------- 1 0 2048 CRSDG_0000 /dev/asm-crs01 CRSDG_0000 NORMAL 1 1 2048 CRSDG_0001 /dev/asm-crs02 CRSDG_0001 NORMAL 1 2 2048 CRSDG_0002 /dev/asm-crs03 CRSDG_0002 NORMAL 1 3 2048 CRSDG_0003 /dev/asm-crs04 CRSDG_0003 NORMAL 1 4 2048 CRSDG_0004 /dev/asm-crs05 CRSDG_0004 NORMAL 2 0 20480 DATADG_0000 /dev/asm-data1 DATADG_0000 NORMAL 2 1 20480 DATADG_0001 /dev/asm-data2 DATADG_0001 NORMAL 2.配置NFS 2.1 NFS服务器端 [root@dm01db01 /]# cat /etc/exports /oracle/votedisk 192.168.123.31(rw,sync,no_root_squash) /oracle/votedisk 192.168.123.32(rw,sync,no_root_squash) [root@dm01db01 /]# /etc/rc.d/init.d/portmap start Starting portmap: [ OK ] [root@dm01db01 /]# /etc/rc.d/init.d/nfs start Starting NFS services: [ OK ] Starting NFS quotas: [ OK ] Starting NFS daemon: [ OK ] Starting NFS mountd: [ OK ] Stopping RPC idmapd: [ OK ] Starting RPC idmapd: [ OK ] 2.2 数据库服务器端MOUNT文件系统 # mount -t nfs -o rw,bg,hard,nointr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600,actimeo=0 192.168.123.139:/oracle/votedisk /u01/app/oracle/votedisk # dd if=/dev/zero of=/u01/app/oracle/votedisk/votedisk01 bs=1M count=2048 # chown -R grid.oinstall /u01/app/oracle/votedisk 2.3 添加 quorum fg disk SQL> alter system set asm_diskstring="/dev/asm*","/u01/app/oracle/votedisk/vote*" sid='*' scope=both; SQL> alter diskgroup CRSDG add quorum failgroup FGQ DISK '/u01/app/oracle/votedisk/votedisk01'; SQL> select GROUP_NUMBER,DISK_NUMBER,OS_MB,NAME,PATH,FAILGROUP,state from v$asm_disk order by 1,2; GROUP_NUMBER DISK_NUMBER OS_MB NAME PATH FAILGROUP STATE ------------ ----------- ---------- ------------------------------ --------------- ------------------------------ -------- 1 0 2048 CRSDG_0000 /dev/asm-crs01 CRSDG_0000 NORMAL 1 1 2048 CRSDG_0001 /dev/asm-crs02 CRSDG_0001 NORMAL 1 2 2048 CRSDG_0002 /dev/asm-crs03 CRSDG_0002 NORMAL 1 3 2048 CRSDG_0003 /dev/asm-crs04 CRSDG_0003 NORMAL 1 4 2048 CRSDG_0004 /dev/asm-crs05 CRSDG_0004 NORMAL 1 5 2048 CRSDG_0005 /u01/app/oracle FGQ NORMAL <== /votedisk/voted isk01 2 0 20480 DATADG_0000 /dev/asm-data1 DATADG_0000 NORMAL 2 1 20480 DATADG_0001 /dev/asm-data2 DATADG_0001 NORMAL 2.4 删除多余的磁盘 在quorum fg添加后,可以看到还没有votedisk切换到其上 这时,我们将删除有3个投票盘阵列上的一个投票盘 [grid@database1 votedisk]$ crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG] 2. ONLINE 10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG] 3. ONLINE 0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG] 4. ONLINE 83fa917a0e844f23bf27238aff51b57a (/dev/asm-crs05) [CRSDG] 5. ONLINE 18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG] SQL> alter diskgroup crsdg drop disk CRSDG_0004; <== [grid@database1 votedisk]$ crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG] 2. ONLINE 10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG] 3. ONLINE 0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG] 4. ONLINE 18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG] 5. ONLINE 6fc616a6923a4fb2bffca18e44a58533 (/u01/app/oracle/votedisk/votedisk01) [CRSDG] <== *********************************************************** 相关配置 *********************************************************** 对于数据库DiskGroup,创建时应该为不同阵列的盘,指定好FAILGROUP,以确保2份数据(NORMAL冗余)放在不同的阵列中 修改参数asm_preferred_read_failure_groups,不同的节点,从不同的FG组中读取数据 *********************************************************** 测试 *********************************************************** 1.NFS服务器宕机 2014-02-13 16:56:38.476: [cssd(3577)]CRS-1615:No I/O has completed after 50% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 99910 milliseconds 2014-02-13 16:57:28.541: [cssd(3577)]CRS-1614:No I/O has completed after 75% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 49850 milliseconds 2014-02-13 16:57:58.565: [cssd(3577)]CRS-1613:No I/O has completed after 90% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 19830 milliseconds 2014-02-13 16:58:18.573: [cssd(3577)]CRS-1604:CSSD voting file is offline: /u01/app/oracle/votedisk/votedisk01; details at (:CSSNM00058:) in /u01/app/11.2.0/grid/log/database1/cssd/ocssd.log. [root@database1 ~]# crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG] 2. ONLINE 10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG] 3. ONLINE 0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG] 4. ONLINE 18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG] Located 4 voting disk(s). 集群件运行正常此时 恢复 [root@database1 ~]# umount -f /u01/app/oracle/votedisk [root@database1 ~]# mount -t nfs -o rw,bg,hard,nointr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600,actimeo=0 192.168.123.139:/oracle/votedisk /u01/app/oracle/votedisk GROUP_NUMBER DISK_NUMBER OS_MB NAME PATH FAILGROUP STATE ------------ ----------- ---------- ------------------------------ ------------------------------ ------------------------------ -------- 0 5 2048 /u01/app/oracle/votedisk/voted NORMAL <== 文件此时并不属于CRSDG isk01 0 9 2048 /dev/asm-crs05 NORMAL 1 0 2048 CRSDG_0000 /dev/asm-crs01 CRSDG_0000 NORMAL 1 1 2048 CRSDG_0001 /dev/asm-crs02 CRSDG_0001 NORMAL 1 2 2048 CRSDG_0002 /dev/asm-crs03 CRSDG_0002 NORMAL 1 3 2048 CRSDG_0003 /dev/asm-crs04 CRSDG_0003 NORMAL 1 4 0 CRSDG_0004 FGQ NORMAL <== 2 0 20480 DATADG_0000 /dev/asm-data1 DATADG_0000 NORMAL 2 1 20480 DATADG_0001 /dev/asm-data2 DATADG_0001 NORMAL [root@database1 votedisk]# dd if=/dev/zero of=/u01/app/oracle/votedisk/votedisk01 bs=1M count=2048 <== 重新格式化Voting disk SQL> alter diskgroup CRSDG add quorum failgroup FGQ DISK '/u01/app/oracle/votedisk/votedisk01'; <== 重新添加 GROUP_NUMBER DISK_NUMBER OS_MB NAME PATH FAILGROUP STATE ------------ ----------- ---------- ------------------------------ --------------- ------------------------------ -------- 0 4 2048 /dev/asm-crs05 NORMAL 1 0 2048 CRSDG_0000 /dev/asm-crs01 CRSDG_0000 NORMAL 1 1 2048 CRSDG_0001 /dev/asm-crs02 CRSDG_0001 NORMAL 1 2 2048 CRSDG_0002 /dev/asm-crs03 CRSDG_0002 NORMAL 1 3 2048 CRSDG_0003 /dev/asm-crs04 CRSDG_0003 NORMAL 1 4 2048 CRSDG_0004 /u01/app/oracle FGQ NORMAL <== /votedisk/voted isk01 1 5 0 CRSDG_0005 FGQ NORMAL <== 2 0 20480 DATADG_0000 /dev/asm-data1 DATADG_0000 NORMAL 2 1 20480 DATADG_0001 /dev/asm-data2 DATADG_0001 NORMAL SQL> alter diskgroup crsdg drop quorum disk CRSDG_0005 force; <== 将旧的删除 GROUP_NUMBER DISK_NUMBER OS_MB NAME PATH FAILGROUP STATE ------------ ----------- ---------- ------------------------------ --------------- ------------------------------ -------- 0 4 2048 /dev/asm-crs05 NORMAL <== 1 0 2048 CRSDG_0000 /dev/asm-crs01 CRSDG_0000 NORMAL 1 1 2048 CRSDG_0001 /dev/asm-crs02 CRSDG_0001 NORMAL 1 2 2048 CRSDG_0002 /dev/asm-crs03 CRSDG_0002 NORMAL 1 3 2048 CRSDG_0003 /dev/asm-crs04 CRSDG_0003 NORMAL 1 4 2048 CRSDG_0004 /u01/app/oracle FGQ NORMAL <== /votedisk/voted isk01 2 0 20480 DATADG_0000 /dev/asm-data1 DATADG_0000 NORMAL 2 1 20480 DATADG_0001 /dev/asm-data2 DATADG_0001 NORMAL 2.先于NFS服务器启动GRID [root@database1 ~]# crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG] 2. ONLINE 10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG] 3. ONLINE 0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG] 4. ONLINE 18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG] 5. OFFLINE ad0268dcec654f14bfe60497d9490780 () [] Located 5 voting disk(s). [root@database1 ~]# crsctl stat res -t <== 无法启动 CRS-4535: Cannot communicate with Cluster Ready Services CRS-4000: Command Status failed, or completed with errors. [root@database2 ~]# crsctl stat res -t -init -------------------------------------------------------------------------------- NAME TARGET STATE SERVER STATE_DETAILS -------------------------------------------------------------------------------- Cluster Resources -------------------------------------------------------------------------------- ora.asm 1 ONLINE INTERMEDIATE database2 OCR not started <== ASM启动,但是CRSDG无法MOUNT ora.cluster_interconnect.haip 1 ONLINE ONLINE database2 ora.crf 1 ONLINE ONLINE database2 ora.crsd 1 ONLINE OFFLINE ora.cssd 1 ONLINE ONLINE database2 ora.cssdmonitor 1 ONLINE ONLINE database2 ora.ctssd 1 ONLINE ONLINE database2 ACTIVE:0 ora.diskmon 1 OFFLINE OFFLINE ora.drivers.acfs 1 ONLINE ONLINE database2 ora.evmd 1 ONLINE INTERMEDIATE database2 ora.gipcd 1 ONLINE ONLINE database2 ora.gpnpd 1 ONLINE ONLINE database2 ora.mdnsd 1 ONLINE ONLINE database2 SQL> alter diskgroup crsdg mount; alter diskgroup crsdg mount * ERROR at line 1: ORA-15032: not all alterations performed ORA-15040: diskgroup is incomplete ORA-15042: ASM disk "4" is missing from group number "1" SQL> alter diskgroup crsdg mount force; <== 强制MOUNT Diskgroup altered. [root@database1 ~]# crsctl stat res -t -------------------------------------------------------------------------------- NAME TARGET STATE SERVER STATE_DETAILS -------------------------------------------------------------------------------- Local Resources -------------------------------------------------------------------------------- ora.CRSDG.dg ONLINE ONLINE database1 ONLINE ONLINE database2 ora.DATADG.dg ONLINE ONLINE database1 ONLINE ONLINE database2 ora.LISTENER.lsnr ONLINE ONLINE database1 ONLINE ONLINE database2 ora.LISTENER_1522.lsnr ONLINE ONLINE database1 ONLINE ONLINE database2 ora.LISTENER_1523.lsnr ONLINE ONLINE database1 ONLINE ONLINE database2 ora.asm ONLINE ONLINE database1 Started ONLINE ONLINE database2 Started ora.gsd OFFLINE OFFLINE database1 OFFLINE OFFLINE database2 ora.net1.network ONLINE ONLINE database1 ONLINE ONLINE database2 ora.ons ONLINE ONLINE database1 ONLINE ONLINE database2 ora.registry.acfs ONLINE ONLINE database1 ONLINE ONLINE database2 -------------------------------------------------------------------------------- Cluster Resources -------------------------------------------------------------------------------- ora.LISTENER_SCAN1.lsnr 1 ONLINE ONLINE database1 ora.cvu 1 OFFLINE OFFLINE ora.database1.vip 1 ONLINE ONLINE database1 ora.database2.vip 1 ONLINE ONLINE database2 ora.oc4j 1 ONLINE ONLINE database1 ora.orcl.billa3.svc 1 ONLINE ONLINE database1 ora.orcl.db 1 ONLINE ONLINE database1 Open 2 ONLINE ONLINE database2 Open ora.scan1.vip 1 ONLINE ONLINE database1 [root@database1 ~]# crsctl query css votedisk ## STATE File Universal Id File Name Disk group -- ----- ----------------- --------- --------- 1. ONLINE 46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG] 2. ONLINE 10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG] 3. ONLINE 0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG] 4. ONLINE 18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG] Located 4 voting disk(s). 恢复和测试1相同
--disable object affinity
alter system set "_gc_affinity_time"=0 scope=spfile ;
--disable undo affinity
alter system set "_gc_undo_affinity"=FALSE scope=spfile;
然后同时重启所有实例生效。
如果暂时无法重启实例,可以使用如下命令“事实上”禁用DRM:(以下两个参数可以动态调整)
alter system set “_gc_affinity_limit”=10000000 sid='*';
alter system set “_gc_affinity_minimum”=10000000 sid='*';
在11g中,同样可以使用如下方式禁用DRM,强烈建议关闭:
alter system set "_gc_policy_time"=0 scope=spfile;
然后同时重启所有实例生效。如果不想完全禁用DRM,但是需要禁用read-mostly locking或者reader bypass的机制。可以使用如下命令:
--disable read-mostly locking
alter system set "_gc_read_mostly_locking"=false scope=spfile sid='*';
--disable reader-bypass
alter system set "_gc_bypass_readers"=false scope=spfile sid='*';