AIX 下主要用sysdumpdev 命令修改主、次dump设备
root@testdb:/var/adm/ras# sysdumpdev -l //查看dump文件的位置
primary /dev/lg_dumplv
secondary /dev/sysdumpnull
copy directory /var/adm/ras
forced copy flag TRUE
always allow dump FALSE
dump compression ON
type of dump traditional
root@testdb:/var/adm/ras# sysdumpdev -e //确认下需要的最小dump大小,大约为390M左右;
0453-041 Estimated dump size in bytes: 409154355
root@testdb:/var/adm/ras# sysdumpdev -L //查看dump文件的生成状态;
0453-039
Device name: /dev/lg_dumplv
Major device number: 10
Minor device number: 11
Size: 134017536 bytes
Uncompressed Size: 1109719634 bytes
Date/Time: Tue Sep 2 17:00:20 CST 2014
Dump status: 0
Type of dump: traditional
dump completed successfully
root@testdb:/tmp# lsvg rootvg
VOLUME GROUP: rootvg VG IDENTIFIER: 000b08560000d70000000147b7501511
VG STATE: active PP SIZE: 256 megabyte(s)
VG PERMISSION: read/write TOTAL PPs: 546 (139776 megabytes)
MAX LVs: 256 FREE PPs: 342 (87552 megabytes)
LVs: 14 USED PPs: 204 (52224 megabytes)
OPEN LVs: 11 QUORUM: 2 (Enabled)
TOTAL PVs: 1 VG DESCRIPTORS: 2
STALE PVs: 0 STALE PPs: 0
ACTIVE PVs: 1 AUTO ON: yes
MAX PPs per VG: 32512
MAX PPs per PV: 1016 MAX PVs: 32
LTG size (Dynamic): 256 kilobyte(s) AUTO SYNC: no
HOT SPARE: no BB POLICY: relocatable
PV RESTRICTION: none INFINITE RETRY: no
root@testdb:/tmp#
root@testdb:/tmp#
root@testdb:/tmp# lsvg -l rootvg
rootvg:
LV NAME TYPE LPs PPs PVs LV STATE MOUNT POINT
hd5 boot 1 1 1 closed/syncd N/A
hd6 paging 8 8 1 open/syncd N/A
hd8 jfs2log 1 1 1 open/syncd N/A
hd4 jfs2 82 82 1 open/syncd /
hd2 jfs2 9 9 1 open/syncd /usr
hd9var jfs2 50 50 1 open/syncd /var
hd3 jfs2 5 5 1 open/syncd /tmp
hd1 jfs2 37 37 1 open/syncd /home
hd10opt jfs2 3 3 1 open/syncd /opt
hd11admin jfs2 1 1 1 open/syncd /admin
lg_dumplv sysdump 4 4 1 open/syncd N/A
livedump jfs2 1 1 1 open/syncd /var/adm/ras/livedump
paging00 paging 1 1 1 closed/syncd N/A
pagetest paging 1 1 1 closed/syncd N/A
root@testdb:/tmp#
root@testdb:/var/adm/ras# extendlv lg_dumplv 8 //对dump 文件扩容;如果是dump 指定了是教会分区则直接使用smit chps来更改,如果是指定了逻辑卷则用extendlv dump n来扩容
root@testdb:/var/adm/ras# lsvg -l rootvg
rootvg:
LV NAME TYPE LPs PPs PVs LV STATE MOUNT POINT
hd5 boot 1 1 1 closed/syncd N/A
hd6 paging 8 8 1 open/syncd N/A
hd8 jfs2log 1 1 1 open/syncd N/A
hd4 jfs2 82 82 1 open/syncd /
hd2 jfs2 17 17 1 open/syncd /usr
hd9var jfs2 50 50 1 open/syncd /var
hd3 jfs2 13 13 1 open/syncd /tmp
hd1 jfs2 37 37 1 open/syncd /home
hd10opt jfs2 3 3 1 open/syncd /opt
hd11admin jfs2 1 1 1 open/syncd /admin
lg_dumplv sysdump 12 12 1 open/syncd N/A
livedump jfs2 1 1 1 open/syncd /var/adm/ras/livedump
paging00 paging 1 1 1 closed/syncd N/A
pagetest paging 1 1 1 closed/syncd N/A
我们可以看到系统目前需要最小的dump为390M左右,而我们为lg_dumplv分配的为1G大小;
修改主dump 到一个指定的逻辑卷 /dev/dumptest,使用
#sysdumpdev -P -p /dev/dumptest;如果逻辑卷不存在,要先创建,然后在指定主dumP设备
root@testdb:/# smit lv
Add a Logical Volume
Type or select values in entry fields.
Press Enter AFTER making all desired changes.
[TOP] [Entry Fields]
Logical volume NAME [dumptest]
* VOLUME GROUP name rootvg 不能是其他卷组
* Number of LOGICAL PARTITIONS [6]
PHYSICAL VOLUME names [hdisk2]
Logical volume TYPE [sysdump]
.............................
dumptest sysdump 6 6 1 closed/syncd N/A
root@testdb:/# sysdumpdev -l
primary /dev/lg_dumplv
secondary /dev/sysdumpnull
copy directory /var/adm/ras
forced copy flag TRUE
always allow dump FALSE
dump compression ON
type of dump traditional
root@testdb:/# sysdumpdev -P -p /dev/dumptest
primary /dev/dumptest
secondary /dev/sysdumpnull
copy directory /var/adm/ras
forced copy flag TRUE
always allow dump FALSE
dump compression ON
type of dump traditional
root@testdb:/# sysdumpdev -l
primary /dev/dumptest
secondary /dev/sysdumpnull
copy directory /var/adm/ras
forced copy flag TRUE
always allow dump FALSE
dump compression ON
type of dump traditional
root@testdb:/# extendlv dumptest 2
dumptest sysdump 8 8 1 open/syncd N/A
#snap -a 或者snap -Dd /tmp/ibmsupt
root@testdb:/tmp/ibmsupt/dump# pwd
/tmp/ibmsupt/dump
root@testdb:/tmp/ibmsupt/dump#
root@testdb:/tmp/ibmsupt/dump# ls -al
total 295144
drwx------ 3 root system 4096 Sep 02 21:41 .
drwx------ 32 root system 4096 Sep 02 21:43 ..
drwx------ 2 root system 4096 Sep 02 21:41 autoload
-rw------- 1 root system 134017536 Sep 02 21:41 dump.BZ
-rw------- 1 root system 784 Sep 02 21:41 dump.snap
lrwxrwxrwx 1 root system 6 Sep 02 21:41 errdead -> kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 kdb -> kdb_64
-r-xr-xr-x 1 bin bin 8250096 Feb 04 2013 kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 livedumpdead -> kdb_64
-rw------- 1 root system 18433 Sep 02 21:41 mdmprpt.out
-rw------- 1 root system 2197 Sep 02 17:04 minidump_last
lrwxrwxrwx 1 root system 6 Sep 02 21:41 trcdead -> kdb_64
-rw------- 1 root system 8797164 Sep 02 21:41 unix.Z
oot@testdb:/tmp/ibmsupt/dump# kdb dump.BZ
dump.BZ mapped from @ a00000000000000 to @ a00000007fcf200
The dump appears to be compressed.
It needs to be uncompressed with the dmpuncompress command before running kdb
root@testdb:/tmp/ibmsupt/dump# dmpuncompress dump.BZ
dump: A file cannot be larger than the value set by ulimit.
dmpuncompress: A file cannot be larger than the value set by ulimit.
Error writing to the dump file.
Check that the ulimit value for the filesystem is set high enough.
Check that the filesystem has large file support enabled.
Run dmpuncompress -p <filename> to preserve the partially
uncompressed dump.
dump: A file cannot be larger than the value set by ulimit
root@testdb:/etc/security# ulimit -a
time(seconds) unlimited
file(blocks) 2097151
data(kbytes) 131072
stack(kbytes) 32768
memory(kbytes) 32768
coredump(blocks) 2097151
nofiles(descriptors) 2000
threads(per process) unlimited
processes(per user) unlimited
修改/etc/security/limits
default:
fsize = -1
core = -1
cpu = -1
data = -1
rss = -1
stack = -1
nofiles = -1
root@testdb:/# ulimit -a
time(seconds) unlimited
file(blocks) unlimited
data(kbytes) unlimited
stack(kbytes) 4194304
memory(kbytes) unlimited
coredump(blocks) unlimited
nofiles(descriptors) unlimited
threads(per process) unlimited
processes(per user) unlimited
root@testdb:/#
root@testdb:/# cd /tmp/ibmsupt
root@testdb:/tmp/ibmsupt# cd dump
root@testdb:/tmp/ibmsupt/dump# ls -l
total 295128
drwx------ 2 root system 4096 Sep 02 21:41 autoload
-rw------- 1 root system 134017536 Sep 02 21:41 dump.BZ
-rw------- 1 root system 784 Sep 02 21:41 dump.snap
lrwxrwxrwx 1 root system 6 Sep 02 21:41 errdead -> kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 kdb -> kdb_64
-r-xr-xr-x 1 bin bin 8250096 Feb 04 2013 kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 livedumpdead -> kdb_64
-rw------- 1 root system 18433 Sep 02 21:41 mdmprpt.out
-rw------- 1 root system 2197 Sep 02 17:04 minidump_last
lrwxrwxrwx 1 root system 6 Sep 02 21:41 trcdead -> kdb_64
-rw------- 1 root system 8797164 Sep 02 21:41 unix.Z
root@testdb:/tmp/ibmsupt/dump# dmpuncompress dump.BZ
-- replaced with dump
root@testdb:/tmp/ibmsupt/dump# ls -l
total 2200792
drwx------ 2 root system 4096 Sep 02 21:41 autoload
-rw------- 1 root system 1109719634 Sep 02 21:41 dump
-rw------- 1 root system 784 Sep 02 21:41 dump.snap
lrwxrwxrwx 1 root system 6 Sep 02 21:41 errdead -> kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 kdb -> kdb_64
-r-xr-xr-x 1 bin bin 8250096 Feb 04 2013 kdb_64
lrwxrwxrwx 1 root system 6 Sep 02 21:41 livedumpdead -> kdb_64
-rw------- 1 root system 18433 Sep 02 21:41 mdmprpt.out
-rw------- 1 root system 2197 Sep 02 17:04 minidump_last
lrwxrwxrwx 1 root system 6 Sep 02 21:41 trcdead -> kdb_64
-rw------- 1 root system 8797164 Sep 02 21:41 unix.Z
root@testdb:/tmp/ibmsupt/dump# kdb dump /unix
dump mapped from @ a00000000000000 to @ a0000004224fa52
START END <name>
0000000000001000 00000000058A0000 start+000FD8
F00000002FF47600 F00000002FFDF9C8 __ublock+000000
000000002FF22FF4 000000002FF22FF8 environ+000000
000000002FF22FF8 000000002FF22FFC errno+000000
F1000F0A00000000 F1000F0A10000000 pvproc+000000
F1000F0A10000000 F1000F0A18000000 pvthread+000000
Dump analysis on CHRP_SMP_PCI POWER_PC POWER_5 machine with 8 available CPU(s) (64-bit registers)
Processing symbol table...
.......................done
read vscsi_scsi_ptrs OK, ptr = 0x0
(0)> //在此执行子命令
(0)> h //输入命令h 提供Help,查看所有子命令
CMD ALIAS ALIAS FUNCTION ARG
*** end-user ***
h ? help help [topic]
set setup display/update kdb toggles [toggle]
varset alias define a user variable var value
varlist list user variables
varrm unalias remove user variable var
his hi hist print history [?][count]
! shell escape command
phyp phyp escape command
*** leaving ***
e q g exit [dump]
*** changing context ***
sw switch switch to thread [th {slot/eaddr} | {u/k}]
cpu switch to cpu [cpu number | any]
runcpu run command on all CPUs cmd
context ctx switch to KDB context [cpu number]
*** calculator/converter ***
(0)> more (^C to quit) ?
(0)> status //查看CPU信息
CPU INTR TID TSLOT PID PSLOT PROC_NAME
0 6F00E1 111 2D007C 45 syncd
1 15002B 21 F001E 15 wait
2 16002D 22 100020 16 wait
3 17002F 23 110022 17 wait
4 1750003 373 200054 32 sysdumpstart
5 1D003B 29 130026 19 wait
6 1E003D 30 140028 20 wait
7 1F003F 31 15002A 21 wait
(0)> stat //查看系统状态
SYSTEM_CONFIGURATION:
CHRP_SMP_PCI POWER_PC POWER_5 machine with 8 available CPU(s) (64-bit registers)
SYSTEM STATUS:
sysname... AIX
nodename.. testdb
release... 1
version... 7
build date Feb 4 2013
build time 11:52:44
label..... 1305B_71J
machine... 000B0856D700
nid....... 0B0856D7
time of crash: Tue Sep 2 17:00:20 2014
age of system: 19 hr., 17 min., 21 sec.
xmalloc debug: enabled
FRRs active... 0
FRRs started.. 0
CRASH INFORMATION:
CPU -1 CSA 053A7E80 at time of crash, error code for LEDs: 00000000
输入e 退出
(0)> f 2
pvthread+000200 STACK:
[00082390]waitproc+0006F0 ()
[00081EA4]waitproc+000204 ()
[00231990]procentry+000010 (??, ??, ??, ??)
[kdb_read_mem] no real storage @ FFFFFFFFFFF9320
使用kdb来分析dump 文件,光看分析的内容还是有难度的,不容易看懂,这种问题还是开case让IBM的人来分析;