Vote disk의 장애테스트

 

테스트 환경 : redhat linux 5.8

                    Oracle 11gR2 11.2.0.3


===========================================================================================

1개 지웠을 경우

 

[root@rac1 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. ONLINE   706c29fba45d4f4cbfee1c9f177fb8ab (/dev/raw/raw4) []
 3. ONLINE   99a7e17b746b4f00bfdc2ca37bc7ec8a (/dev/raw/raw5) []

dd를 빨리 하기위해 CRS를 내렸음. ONLINE 중에 dd 해도 되나 시간이 오려걸림

[root@rac1 /root]# crsctl stop crs
CRS-2791: Starting shutdown of Oracle High Availability Services-managed resources on 'rac1'
CRS-2673: Attempting to stop 'ora.crsd' on 'rac1'
CRS-2790: Starting shutdown of Cluster Ready Services-managed resources on 'rac1'
....
이하 생략

[root@rac1 /root]# dd if=/dev/zero of=/dev/raw/raw5 bs=8192
dd: writing `/dev/raw/raw5': No space left on device
38401+0 records in
38400+0 records out
314572800 bytes (315 MB) copied, 20.3524 seconds, 15.5 MB/s

[root@rac1 /root]# crsctl start crs
CRS-4123: Oracle High Availability Services has been started.


[root@rac1 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. ONLINE   706c29fba45d4f4cbfee1c9f177fb8ab (/dev/raw/raw4) []
 3. OFFLINE  99a7e17b746b4f00bfdc2ca37bc7ec8a () []
Located 3 voting disk(s).


[root@rac1 /root]# crsctl add css votedisk /dev/raw/raw5
Now formatting voting disk: /dev/raw/raw5.
CRS-4603: Successful addition of voting disk /dev/raw/raw5.

[root@rac1 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. ONLINE   706c29fba45d4f4cbfee1c9f177fb8ab (/dev/raw/raw4) []
 3. OFFLINE  99a7e17b746b4f00bfdc2ca37bc7ec8a () []
 4. ONLINE   97431d31cd014fe7bf36a3483b6c653d (/dev/raw/raw5) []
Located 4 voting disk(s).

[root@rac1 /root]# crsctl delete css votedisk 99a7e17b746b4f00bfdc2ca37bc7ec8a
CRS-4611: Successful deletion of voting disk 99a7e17b746b4f00bfdc2ca37bc7ec8a.

[root@rac1 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. ONLINE   706c29fba45d4f4cbfee1c9f177fb8ab (/dev/raw/raw4) []
 3. ONLINE   97431d31cd014fe7bf36a3483b6c653d (/dev/raw/raw5) []

===========================================================================================

2개 지웠을 경우

[root@rac1 /root]# crsctl stop crs
CRS-2791: Starting shutdown of Oracle High Availability Services-managed resources on 'rac1'
CRS-2673: Attempting to stop 'ora.crsd' on 'rac1'
CRS-2790: Starting shutdown of Cluster Ready Services-managed resources on 'rac1'
....
이하 생략

[root@rac1 /root]# dd if=/dev/zero of=/dev/raw/raw4 bs=8192
dd: writing `/dev/raw/raw4': No space left on device
38401+0 records in
38400+0 records out
314572800 bytes (315 MB) copied, 19.44 seconds, 16.2 MB/s
[root@rac1 /root]# dd if=/dev/zero of=/dev/raw/raw5 bs=8192
dd: writing `/dev/raw/raw5': No space left on device
38401+0 records in
38400+0 records out
314572800 bytes (315 MB) copied, 20.3099 seconds, 15.5 MB/s
[root@rac1 /root]# crsctl start crs
CRS-4123: Oracle High Availability Services has been started.

[root@rac1 /root]# crsctl query css votedisk
Unable to communicate with the Cluster Synchronization Services daemon.

[root@rac1 /root]# ps -ef |grep d.bin
root      8557     1  2 15:13 ?        00:00:06 /u01/11.2.0/grid/bin/ohasd.bin reboot
oracle    8684     1  0 15:13 ?        00:00:00 /u01/11.2.0/grid/bin/oraagent.bin
oracle    8698     1  0 15:13 ?        00:00:00 /u01/11.2.0/grid/bin/mdnsd.bin
oracle    8710     1  0 15:13 ?        00:00:00 /u01/11.2.0/grid/bin/gpnpd.bin
oracle    8723     1  0 15:13 ?        00:00:01 /u01/11.2.0/grid/bin/gipcd.bin
root      8725     1  0 15:13 ?        00:00:00 /u01/11.2.0/grid/bin/orarootagent.bin
root      8744     1  1 15:13 ?        00:00:04 /u01/11.2.0/grid/bin/osysmond.bin
root      8835     1  2 15:13 ?        00:00:06 /u01/11.2.0/grid/bin/ologgerd -M -d /u01/11.2.0/grid/crf/db/rac1
root      9390     1  0 15:16 ?        00:00:00 /u01/11.2.0/grid/bin/cssdagent
root      9457  4507  0 15:18 pts/2    00:00:00 grep d.bin

[root@rac1 /root]# crsctl stat res -t
CRS-4535: Cannot communicate with Cluster Ready Services
CRS-4000: Command Status failed, or completed with errors.

[root@rac2 /root]# crsctl stop crs -f
CRS-2791: Starting shutdown of Oracle High Availability Services-managed resources on 'rac2'
CRS-2673: Attempting to stop 'ora.crf' on 'rac2'
CRS-2673: Attempting to stop 'ora.mdnsd' on 'rac2'
CRS-2677: Stop of 'ora.crf' on 'rac2' succeeded
CRS-2673: Attempting to stop 'ora.gipcd' on 'rac2'
CRS-2677: Stop of 'ora.mdnsd' on 'rac2' succeeded
CRS-2677: Stop of 'ora.gipcd' on 'rac2' succeeded
CRS-2673: Attempting to stop 'ora.gpnpd' on 'rac2'
CRS-2677: Stop of 'ora.gpnpd' on 'rac2' succeeded
CRS-2793: Shutdown of Oracle High Availability Services-managed resources on 'rac2' has completed
CRS-4133: Oracle High Availability Services has been stopped.


[root@rac2 /root]# crsctl start crs -excl
CRS-4123: Oracle High Availability Services has been started.
CRS-2672: Attempting to start 'ora.mdnsd' on 'rac2'
CRS-2676: Start of 'ora.mdnsd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.gpnpd' on 'rac2'
CRS-2676: Start of 'ora.gpnpd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.cssdmonitor' on 'rac2'
CRS-2672: Attempting to start 'ora.gipcd' on 'rac2'
CRS-2676: Start of 'ora.cssdmonitor' on 'rac2' succeeded
CRS-2676: Start of 'ora.gipcd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.cssd' on 'rac2'
CRS-2672: Attempting to start 'ora.diskmon' on 'rac2'
CRS-2676: Start of 'ora.diskmon' on 'rac2' succeeded
CRS-2676: Start of 'ora.cssd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.ctssd' on 'rac2'
CRS-2676: Start of 'ora.ctssd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.crsd' on 'rac2'
CRS-2676: Start of 'ora.crsd' on 'rac2' succeeded

-excl 옵션은 cluster 를 exclusive mode 로 시작하게 함. 그러므로 한쪽 노드에서만 실행하면 됨

[root@rac2 /root]# ps -ef |grep d.bin
root      9000     1  3 15:22 ?        00:00:03 /u01/11.2.0/grid/bin/ohasd.bin exclusive
oracle    9126     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/oraagent.bin
oracle    9140     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/mdnsd.bin
oracle    9152     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/gpnpd.bin
root      9164     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/cssdmonitor
oracle    9167     1  1 15:22 ?        00:00:01 /u01/11.2.0/grid/bin/gipcd.bin
root      9192     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/cssdagent
oracle    9206     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/ocssd.bin -X
root      9262     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/orarootagent.bin
root      9276     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/octssd.bin
root      9296     1  0 15:22 ?        00:00:00 /u01/11.2.0/grid/bin/crsd.bin reboot
root      9333 12463  0 15:23 pts/1    00:00:00 grep d.bin

[root@rac2 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. OFFLINE  706c29fba45d4f4cbfee1c9f177fb8ab () []
 3. OFFLINE  97431d31cd014fe7bf36a3483b6c653d () []

[root@rac2 /root]# crsctl start crs -excl -nocrs
CRS-4123: Oracle High Availability Services has been started.
CRS-2672: Attempting to start 'ora.mdnsd' on 'rac2'
CRS-2676: Start of 'ora.mdnsd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.gpnpd' on 'rac2'
CRS-2676: Start of 'ora.gpnpd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.cssdmonitor' on 'rac2'
CRS-2672: Attempting to start 'ora.gipcd' on 'rac2'
CRS-2676: Start of 'ora.cssdmonitor' on 'rac2' succeeded
CRS-2676: Start of 'ora.gipcd' on 'rac2' succeeded
CRS-2672: Attempting to start 'ora.cssd' on 'rac2'
CRS-2672: Attempting to start 'ora.diskmon' on 'rac2'
CRS-2676: Start of 'ora.diskmon' on 'rac2' succeeded
CRS-2676: Start of 'ora.cssd' on 'rac2' succeeded

[root@rac2 /root]# crsctl stat res -t -init
--------------------------------------------------------------------------------
NAME           TARGET  STATE        SERVER                   STATE_DETAILS      
--------------------------------------------------------------------------------
Cluster Resources
--------------------------------------------------------------------------------
ora.asm
      1        OFFLINE OFFLINE                               Instance Shutdown  
ora.cluster_interconnect.haip
      1        ONLINE  ONLINE       rac2                                        
ora.crf
      1        ONLINE  ONLINE       rac2                                        
ora.crsd
      1        ONLINE  ONLINE       rac2                                        
ora.cssd
      1        ONLINE  ONLINE       rac2                                        
ora.cssdmonitor
      1        ONLINE  ONLINE       rac2                                        
ora.ctssd
      1        ONLINE  ONLINE       rac2                     OBSERVER           
ora.diskmon
      1        OFFLINE OFFLINE                                                  
ora.evmd
      1        ONLINE  ONLINE       rac2                                        
ora.gipcd
      1        ONLINE  ONLINE       rac2                                        
ora.gpnpd
      1        ONLINE  ONLINE       rac2                                        
ora.mdnsd
      1        ONLINE  ONLINE       rac2   

-nocrs 옵션은 11.2.0.2 에 도입된 것으로 ora.crsd resource 의 시작을 방지해줌. 해당 옵션이

지정되지 않아 ora.crsd resource 가 fail 될 경우 결과적으로 ora.cluster_interconnect.haip resource 가

올라오지 않아 ASM 의 Crash를 가져올수 있음.

 

[root@rac2 /root]# ps -ef |grep d.bin
root      9388     1  2 15:26 ?        00:00:03 /u01/11.2.0/grid/bin/ohasd.bin exclusive
oracle    9515     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/oraagent.bin
oracle    9529     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/mdnsd.bin
oracle    9541     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/gpnpd.bin
root      9553     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/cssdmonitor
oracle    9556     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/gipcd.bin
root      9581     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/cssdagent
oracle    9610     1  0 15:26 ?        00:00:00 /u01/11.2.0/grid/bin/ocssd.bin -X

[root@rac2 /root]# crsctl stat res -t
CRS-4535: Cannot communicate with Cluster Ready Services
CRS-4000: Command Status failed, or completed with errors.

[root@rac1 /root]# crsctl start crs -excl -nocrs
CRS-4123: Oracle High Availability Services has been started.
CRS-2672: Attempting to start 'ora.mdnsd' on 'rac1'
CRS-2676: Start of 'ora.mdnsd' on 'rac1' succeeded
CRS-2672: Attempting to start 'ora.gpnpd' on 'rac1'
CRS-2676: Start of 'ora.gpnpd' on 'rac1' succeeded
CRS-2672: Attempting to start 'ora.cssdmonitor' on 'rac1'
CRS-2672: Attempting to start 'ora.gipcd' on 'rac1'
CRS-2676: Start of 'ora.cssdmonitor' on 'rac1' succeeded
CRS-2676: Start of 'ora.gipcd' on 'rac1' succeeded
CRS-2672: Attempting to start 'ora.cssd' on 'rac1'
CRS-2672: Attempting to start 'ora.diskmon' on 'rac1'
CRS-2676: Start of 'ora.diskmon' on 'rac1' succeeded
CRS-4402: The CSS daemon was started in exclusive mode but found an active CSS daemon on node rac2, number 2, and is terminating
CRS-2674: Start of 'ora.cssd' on 'rac1' failed
CRS-2679: Attempting to clean 'ora.cssd' on 'rac1'
CRS-2681: Clean of 'ora.cssd' on 'rac1' succeeded
CRS-2673: Attempting to stop 'ora.gipcd' on 'rac1'
CRS-2677: Stop of 'ora.gipcd' on 'rac1' succeeded
CRS-2673: Attempting to stop 'ora.cssdmonitor' on 'rac1'
CRS-2677: Stop of 'ora.cssdmonitor' on 'rac1' succeeded
CRS-2673: Attempting to stop 'ora.gpnpd' on 'rac1'
CRS-2677: Stop of 'ora.gpnpd' on 'rac1' succeeded
CRS-2673: Attempting to stop 'ora.mdnsd' on 'rac1'
CRS-2677: Stop of 'ora.mdnsd' on 'rac1' succeeded
CRS-4000: Command Start failed, or completed with errors.
[root@rac1 /root]# ps -ef |grep ora_
root      9941  4507  0 15:31 pts/2    00:00:00 grep ora_
[root@rac1 /root]# ps -ef |grep d.bin
root      9626     1  4 15:30 ?        00:00:03 /u01/11.2.0/grid/bin/ohasd.bin exclusive

[root@rac2 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. OFFLINE  706c29fba45d4f4cbfee1c9f177fb8ab () []
 3. OFFLINE  97431d31cd014fe7bf36a3483b6c653d () []
Located 3 voting disk(s).
[root@rac2 /root]#
[root@rac2 /root]#
[root@rac2 /root]#
[root@rac2 /root]#
[root@rac2 /root]# crsctl add css votedisk /dev/raw/raw4
Now formatting voting disk: /dev/raw/raw4.
CRS-4603: Successful addition of voting disk /dev/raw/raw4.
[root@rac2 /root]# crsctl add css votedisk /dev/raw/raw5
Now formatting voting disk: /dev/raw/raw5.
CRS-4603: Successful addition of voting disk /dev/raw/raw5.

[root@rac2 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. OFFLINE  706c29fba45d4f4cbfee1c9f177fb8ab () []
 3. OFFLINE  97431d31cd014fe7bf36a3483b6c653d () []
 4. ONLINE   87e7d964970a4f91bf8a33217557c04f (/dev/raw/raw4) []
 5. ONLINE   54ddb0c72e2e4f7abfe7684fbe847917 (/dev/raw/raw5) []
Located 5 voting disk(s).
[root@rac2 /root]# crsctl delete css votedisk 706c29fba45d4f4cbfee1c9f177fb8ab
CRS-4611: Successful deletion of voting disk 706c29fba45d4f4cbfee1c9f177fb8ab.
[root@rac2 /root]# crsctl delete css votedisk 97431d31cd014fe7bf36a3483b6c653d
CRS-4611: Successful deletion of voting disk 97431d31cd014fe7bf36a3483b6c653d.
[root@rac2 /root]# crsctl query css votedisk
##  STATE    File Universal Id                File Name Disk group
--  -----    -----------------                --------- ---------
 1. ONLINE   85bfd9f8d4bc4fcebfb9068b3945763b (/dev/raw/raw3) []
 2. ONLINE   87e7d964970a4f91bf8a33217557c04f (/dev/raw/raw4) []
 3. ONLINE   54ddb0c72e2e4f7abfe7684fbe847917 (/dev/raw/raw5) []
Located 3 voting disk(s).
[root@rac2 /root]#

[root@rac2 /root]# crsctl stop crs
CRS-2791: Starting shutdown of Oracle High Availability Services-managed resources on 'rac2'
CRS-2673: Attempting to stop 'ora.cssd' on 'rac2'
CRS-2673: Attempting to stop 'ora.mdnsd' on 'rac2'
CRS-2677: Stop of 'ora.cssd' on 'rac2' succeeded
CRS-2673: Attempting to stop 'ora.gipcd' on 'rac2'
CRS-2677: Stop of 'ora.mdnsd' on 'rac2' succeeded
CRS-2677: Stop of 'ora.gipcd' on 'rac2' succeeded
CRS-2673: Attempting to stop 'ora.gpnpd' on 'rac2'
CRS-2677: Stop of 'ora.gpnpd' on 'rac2' succeeded
CRS-2793: Shutdown of Oracle High Availability Services-managed resources on 'rac2' has completed
CRS-4133: Oracle High Availability Services has been stopped.

[root@rac2 /root]# crsctl start crs
CRS-4123: Oracle High Availability Services has been started

 

=> 결론

노드는 항상 Voting disk의 과반이상을 항상 access할수 있어야 함
최소한의 수를 access 못하면 노드는 cluster에서 evict 또는 remove 되어버림


예를 들어

3개로 구성했을 경우 적어도 2개의 Voting disk 에 access 할수 있어야 함
5개로 구성했을 경우 적어도 3개의 Voting disk 에 access 할수 있어야 함

ps. Oracle Clusterware는 최대 32개까지의 voting disk 구성을 지원함

Posted by pat98

12-27 17:06
Flag Counter
Yesterday
Today
Total

글 보관함

최근에 올라온 글

달력

 « |  » 2024.12
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30 31

최근에 달린 댓글