关于怎么打开core文件,参见上一篇,这里不罗嗦了。
1) 首先,这个panic跟内存使用越界有关。先来看看导致panic的call trace和寄存器。
RIP: 0010:[
RSP: 0018:ffff880142c7da68 EFLAGS: 00010002
RAX: ffff88013f9de800 RBX: ffffffdb6ab92554 RCX: ffff88017d619a70
RDX: 0000000000000001 RSI: ffff88017d619a00 RDI: a800000000000000
RBP: ffff880142c7da70 R08: 8000000000000000 R09: 2aa0000000000000
R10: 5500000000000000 R11: 0000000000000000 R12: ffff880134557660
R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000001000
FS: 0000000000000000(0000) GS:ffff880030440000(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000009 CR3: 0000000001001000 CR4: 00000000000406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process md16_raid4 (pid: 37296, threadinfo ffff880142c7c000, task ffff8801389080c0)
Stack:
ffff88017d619a00 ffff880142c7daa0 ffffffff81237fc4 ffff880142c7daa0
<0> ffff880134557660 0000000000000000 ffff88017d619a00 ffff880142c7db10
<0> ffffffff8124181b ffff880124abaf00 ffff88012701eda8 ffff880124aba900
Call Trace:
[
[
[
[
2) 经过反汇编分析elv_rqhash_add以及内核代码对照,发现RAX的值应该是一个类型为hlist_head的数组。检查这个数组的第一个成员发现,其first成员是1,对于hlist_head的first来说,这是一个不可能存在的值。
crash> struct hlist_head 0xffff88013f9de800
struct hlist_head {
first = 0x1
}
3) 由于这个hlist_head的内容非法,尝试看这个指针本身是否有误。首先看内存指针本身,然后就是从入口参数开始,逐层找到这个指针,看哪个环节首先出错。
通过汇编分析,能够确定寄存器RSI的值(入口参数)是一个request结构体。
crash> struct request ffff88017d619a00
struct request {
queuelist = {
next = 0xffff880137afd860,
prev = 0xffff88018fb86e60
},
csd = {
list = {
next = 0x0,
prev = 0x0
},
func = 0,
info = 0x0,
flags = 0,
priv = 0
},
q = 0xffff880134557660, ;queue
....
crash> kmem 0xffff880134557660 ;从kmem的输出结果看,0xffff880134557660 确实对应一个blkdev_queue结构,看起来合理
CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE
ffff8802360818c0 blkdev_queue 2320 994 996 332 8k
SLAB MEMORY TOTAL ALLOCATED FREE
ffff880134556400 ffff880134556440 3 3 0
FREE / [ALLOCATED]
[ffff880134557660]
crash> struct request_queue.elevator 0xffff880134557660 ;找到queue的elevator
elevator = 0xffff88011b4f7a40
crash> struct elevator_queue 0xffff88011b4f7a40 ;查看elevator的内容,从其kobj的name看,没有异常。
struct elevator_queue {
ops = 0xffffffff817771f0,
elevator_data = 0xffff880137afd860,
kobj = {
name = 0xffff880136065b80 "iosched",
....
},
elevator_type = 0xffffffff817771e0,
....
hash = 0xffff88013f9de800 ;这个就是前面提到的hlist_head数组的指针
}
crash> kmem 0xffff88013f9de800 ;从内核记录的内存分配信息看,这是一个从size-512这个slab分配的内存,未看出指针值本身不合理性。
CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE
ffff88023fcb02c0 size-512 512 4344 4728 591 4k
SLAB MEMORY TOTAL ALLOCATED FREE
ffff88013a96cec0 ffff88013f9de000 8 8 0
FREE / [ALLOCATED]
[ffff88013f9de800]
crash> struct elevator_type 0xffffffff817771e0 ;看看结构体其他兄弟成员吧,内容看起来完全合理,noop这个scheduler也是存在的。
struct elevator_type {
list = {
next = 0xffffffff817772c0,
prev = 0xffffffff81775860
},
ops = {
elevator_merge_fn = 0,
elevator_merged_fn = 0,
elevator_merge_req_fn = 0xffffffff8124d480
elevator_allow_merge_fn = 0,
elevator_bio_merged_fn = 0,
elevator_dispatch_fn = 0xffffffff8124d560
elevator_add_req_fn = 0xffffffff8124d530
elevator_activate_req_fn = 0,
elevator_deactivate_req_fn = 0,
elevator_queue_empty_fn = 0xffffffff8124d4a0
elevator_completed_req_fn = 0,
elevator_former_req_fn = 0xffffffff8124d4c0
elevator_latter_req_fn = 0xffffffff8124d4e0
elevator_set_req_fn = 0,
elevator_put_req_fn = 0,
elevator_may_queue_fn = 0,
elevator_init_fn = 0xffffffff8124d5a0
elevator_exit_fn = 0xffffffff8124d500
trim = 0
},
elevator_attrs = 0x0,
elevator_name = "noop\000\000\000\000\000\000\000\000\000\000\000",
elevator_owner = 0x0
}
从内核代码看,hash这个数组的大小为64,因此读取64个指针,发现只有第1个为1,其他为0(NULL)或者合理的内核指针。注意,对于hlist_head->first,0也是合理的。
crash> rd 0xffff88013f9de800 64
ffff88013f9de800: 0000000000000001 0000000000000000 ................
ffff88013f9de810: 0000000000000000 0000000000000000 ................
ffff88013f9de820: 0000000000000000 0000000000000000 ................
ffff88013f9de830: 0000000000000000 0000000000000000 ................
ffff88013f9de840: 0000000000000000 0000000000000000 ................
ffff88013f9de850: 0000000000000000 0000000000000000 ................
ffff88013f9de860: 0000000000000000 0000000000000000 ................
ffff88013f9de870: 0000000000000000 0000000000000000 ................
ffff88013f9de880: 0000000000000000 0000000000000000 ................
ffff88013f9de890: 0000000000000000 0000000000000000 ................
ffff88013f9de8a0: 0000000000000000 0000000000000000 ................
ffff88013f9de8b0: 0000000000000000 0000000000000000 ................
ffff88013f9de8c0: 0000000000000000 0000000000000000 ................
ffff88013f9de8d0: 0000000000000000 0000000000000000 ................
ffff88013f9de8e0: 0000000000000000 0000000000000000 ................
ffff88013f9de8f0: 0000000000000000 0000000000000000 ................
ffff88013f9de900: 0000000000000000 0000000000000000 ................
ffff88013f9de910: 0000000000000000 0000000000000000 ................
ffff88013f9de920: 0000000000000000 0000000000000000 ................
ffff88013f9de930: 0000000000000000 0000000000000000 ................
ffff88013f9de940: 0000000000000000 0000000000000000 ................
ffff88013f9de950: 0000000000000000 0000000000000000 ................
ffff88013f9de960: 0000000000000000 0000000000000000 ................
ffff88013f9de970: 0000000000000000 0000000000000000 ................
ffff88013f9de980: ffff88018fb86ed0 0000000000000000 .n..............
ffff88013f9de990: 0000000000000000 0000000000000000 ................
ffff88013f9de9a0: 0000000000000000 0000000000000000 ................
ffff88013f9de9b0: 0000000000000000 0000000000000000 ................
ffff88013f9de9c0: 0000000000000000 0000000000000000 ................
ffff88013f9de9d0: 0000000000000000 0000000000000000 ................
ffff88013f9de9e0: 0000000000000000 0000000000000000 ................
ffff88013f9de9f0: 0000000000000000 0000000000000000 ................
5) 到这里没有看出非常明显的规律,只知道一块内存的前8个字节内容不合理。后来又重现了这个panic,分析其他vmcore时也发现了类似的情况,由此想到了这块与内存相邻的内存区可能产生了溢出。由于这块内存位于size-512的slab中,根据slab的特点,直接对 0xffff88013f9de800 -512这个地址的数据进行读入:
crash> rd ffff88013f9de600 64 ;0xffff88013f9de800 - 512 = ffff88013f9de600
ffff88013f9de600: 0000000000000000 ffffffffa02460e0 .........`$.....
ffff88013f9de610: ffffffffa0245e80 0000000000000000 .^$.............
ffff88013f9de620: ffff880232d88620 ffff88013a6eac20 ..2.... .n:....
ffff88013f9de630: ffff88015ae3f600 ffff88015ae3f540 [email protected]....
ffff88013f9de640: ffff88015ae3f480 ffff88015ae3f3c0 ...Z.......Z....
ffff88013f9de650: ffff88015ae3f300 ffff88015ae3f240 [email protected]....
ffff88013f9de660: ffff88015ae3f6c0 0000000000000000 ...Z............
ffff88013f9de670: 00000000170b170b ffff880141931e98 ...........A....
ffff88013f9de680: ffff880141931e98 0000000000000000 ...A............
ffff88013f9de690: 0000000000000000 0000000000000000 ................
ffff88013f9de6a0: 0000000000030003 ffff88013f9de6a8 ...........?....
ffff88013f9de6b0: ffff88013f9de6a8 ffff88015488a080 ...?.......T....
ffff88013f9de6c0: ffff88012b739358 ffff8802368e53a0 X.s+.....S.6....
ffff88013f9de6d0: 00000001010778fd ffffffffa0243570 .x......p5$.....
ffff88013f9de6e0: ffff880133821028 ffff8802368e4000 ([email protected]....
ffff88013f9de6f0: 0000000000000000 0000000000000000 ................
ffff88013f9de700: 0000000000000000 00000000ffffffff ................
ffff88013f9de710: 0000000000000000 0000000000000000 ................
ffff88013f9de720: ffff88013f9de720 ffff88013f9de720 ..?.... ..?....
ffff88013f9de730: 0000020000000001 0000000000000200 ................
ffff88013f9de740: 0000000000000000 0000000068676867 ........ghgh....
ffff88013f9de750: ffff88013f9de750 ffff88013f9de750 P..?....P..?....
ffff88013f9de760: 0000000001000000 ffff88012234ec60 ........`.4"....
ffff88013f9de770: 0000002000000000 00001f4000001f40 .... ...@...@...
ffff88013f9de780: 0000000000000040 0000000000000000 @...............
ffff88013f9de790: 0000000000000000 0000000000000018 ................
ffff88013f9de7a0: 0000000000000051 000000000000004e Q.......N.......
ffff88013f9de7b0: 0000000000000062 0000000000000169 b.......i.......
ffff88013f9de7c0: 0000000000000de6 00000000000005e3 ................
ffff88013f9de7d0: 0000000000000587 0000000000000039 ........9.......
ffff88013f9de7e0: 0000000000000140 000000000000000a @...............
ffff88013f9de7f0: 0000000000000024 0000000000000018 $...............
6)上面的结构体解析,没有找到非常有意义的字符串,并不能直接告诉我们它对应什么结构体,其大小也不一定正好是512字节。但是我们能够知道该结构体前半部分有很多指针,后半部分有很多整数,看起来像个整数数组。
我们进一步读入该结构体内指针(标记为红色)指向的地址后,找到了一些有特征的字符串,这些字符串属于我们自己开发的模块,具体细节就不在这里列出来了。通过那些有特征的字符串,很快定位到了对应的结构体。发现这个结构体的大小正好为512,该结构体的最后一部分确实是个long型的数组。随后的代码review发现了操作该结构体的一段有问题的代码的确会越界一个long的大小(8字节)。