Kernel Panic (Kdump) 解析实例之二

关于怎么打开core文件,参见上一篇,这里不罗嗦了。


1) 首先,这个panic跟内存使用越界有关。先来看看导致panic的call trace和寄存器。

 

RIP: 0010:[] [] elv_rqhash_add+0x81/0xa0
 
RSP: 0018:ffff880142c7da68 EFLAGS: 00010002
RAX: ffff88013f9de800 RBX: ffffffdb6ab92554 RCX: ffff88017d619a70
RDX: 0000000000000001 RSI: ffff88017d619a00 RDI: a800000000000000
RBP: ffff880142c7da70 R08: 8000000000000000 R09: 2aa0000000000000
R10: 5500000000000000 R11: 0000000000000000 R12: ffff880134557660
R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000001000
FS: 0000000000000000(0000) GS:ffff880030440000(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000009 CR3: 0000000001001000 CR4: 00000000000406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process md16_raid4 (pid: 37296, threadinfo ffff880142c7c000, task ffff8801389080c0)
Stack:
ffff88017d619a00 ffff880142c7daa0 ffffffff81237fc4 ffff880142c7daa0
<0> ffff880134557660 0000000000000000 ffff88017d619a00 ffff880142c7db10
<0> ffffffff8124181b ffff880124abaf00 ffff88012701eda8 ffff880124aba900
Call Trace:
[] elv_merged_request+0x84/0x90
[] __make_request+0x29b/0x4b0
[] dm_request+0x166/0x230 [dm_mod]
[] generic_make_request+0x1b2/0x4f0
......


2) 经过反汇编分析elv_rqhash_add以及内核代码对照,发现RAX的值应该是一个类型为hlist_head的数组。检查这个数组的第一个成员发现,其first成员是1,对于hlist_head的first来说,这是一个不可能存在的值。

  
crash> struct hlist_head 0xffff88013f9de800
struct hlist_head {
  first = 0x1
}


3) 由于这个hlist_head的内容非法,尝试看这个指针本身是否有误。首先看内存指针本身,然后就是从入口参数开始,逐层找到这个指针,看哪个环节首先出错。


通过汇编分析,能够确定寄存器RSI的值(入口参数)是一个request结构体。

crash> struct request ffff88017d619a00 
struct request {
  queuelist = {
    next = 0xffff880137afd860,
    prev = 0xffff88018fb86e60
  },
  csd = {
    list = {
      next = 0x0,
      prev = 0x0
    },
    func = 0,
    info = 0x0,
    flags = 0,
    priv = 0
  },
  q = 0xffff880134557660, ;queue
....

crash> kmem 0xffff880134557660  ;从kmem的输出结果看,0xffff880134557660  确实对应一个blkdev_queue结构,看起来合理
CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE
ffff8802360818c0 blkdev_queue 2320 994 996 332 8k
SLAB MEMORY TOTAL ALLOCATED FREE
ffff880134556400 ffff880134556440 3 3 0
FREE / [ALLOCATED]
  [ffff880134557660]



crash> struct request_queue.elevator 0xffff880134557660   ;找到queue的elevator
  elevator = 0xffff88011b4f7a40
  
crash> struct elevator_queue 0xffff88011b4f7a40   ;查看elevator的内容,从其kobj的name看,没有异常。
struct elevator_queue {
  ops = 0xffffffff817771f0,
  elevator_data = 0xffff880137afd860,
  kobj = {
    name = 0xffff880136065b80 "iosched",
    ....
  },
  elevator_type = 0xffffffff817771e0,
    ....
  hash = 0xffff88013f9de800   ;这个就是前面提到的hlist_head数组的指针
}


crash> kmem 0xffff88013f9de800      ;从内核记录的内存分配信息看,这是一个从size-512这个slab分配的内存,未看出指针值本身不合理性。
CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE
ffff88023fcb02c0 size-512 512 4344 4728 591 4k
SLAB MEMORY TOTAL ALLOCATED FREE
ffff88013a96cec0 ffff88013f9de000 8 8 0
FREE / [ALLOCATED]
  [ffff88013f9de800]


crash> struct elevator_type 0xffffffff817771e0    ;看看结构体其他兄弟成员吧,内容看起来完全合理,noop这个scheduler也是存在的。
struct elevator_type {
  list = {
    next = 0xffffffff817772c0,
    prev = 0xffffffff81775860
  },
  ops = {
    elevator_merge_fn = 0,
    elevator_merged_fn = 0,
    elevator_merge_req_fn = 0xffffffff8124d480 ,
    elevator_allow_merge_fn = 0,
    elevator_bio_merged_fn = 0,
    elevator_dispatch_fn = 0xffffffff8124d560 ,
    elevator_add_req_fn = 0xffffffff8124d530 ,
    elevator_activate_req_fn = 0,
    elevator_deactivate_req_fn = 0,
    elevator_queue_empty_fn = 0xffffffff8124d4a0 ,
    elevator_completed_req_fn = 0,
    elevator_former_req_fn = 0xffffffff8124d4c0 ,
    elevator_latter_req_fn = 0xffffffff8124d4e0 ,
    elevator_set_req_fn = 0,
    elevator_put_req_fn = 0,
    elevator_may_queue_fn = 0,
    elevator_init_fn = 0xffffffff8124d5a0 ,
    elevator_exit_fn = 0xffffffff8124d500 ,
    trim = 0
  },
  elevator_attrs = 0x0,
  elevator_name = "noop\000\000\000\000\000\000\000\000\000\000\000",
  elevator_owner = 0x0
}
  
4) 上面的分析看,request, queue以及elevator_queue,elevator_queue.hash这些从参数推到出来的值本身没有问题。那么,为什么出现hash指向的hlist_head数组的第一个元素有问题呢?是否整个数组都有问题?


从内核代码看,hash这个数组的大小为64,因此读取64个指针,发现只有第1个为1,其他为0(NULL)或者合理的内核指针。注意,对于hlist_head->first,0也是合理的。

crash> rd 0xffff88013f9de800 64
ffff88013f9de800: 0000000000000001 0000000000000000 ................
ffff88013f9de810: 0000000000000000 0000000000000000 ................
ffff88013f9de820: 0000000000000000 0000000000000000 ................
ffff88013f9de830: 0000000000000000 0000000000000000 ................
ffff88013f9de840: 0000000000000000 0000000000000000 ................
ffff88013f9de850: 0000000000000000 0000000000000000 ................
ffff88013f9de860: 0000000000000000 0000000000000000 ................
ffff88013f9de870: 0000000000000000 0000000000000000 ................
ffff88013f9de880: 0000000000000000 0000000000000000 ................
ffff88013f9de890: 0000000000000000 0000000000000000 ................
ffff88013f9de8a0: 0000000000000000 0000000000000000 ................
ffff88013f9de8b0: 0000000000000000 0000000000000000 ................
ffff88013f9de8c0: 0000000000000000 0000000000000000 ................
ffff88013f9de8d0: 0000000000000000 0000000000000000 ................
ffff88013f9de8e0: 0000000000000000 0000000000000000 ................
ffff88013f9de8f0: 0000000000000000 0000000000000000 ................
ffff88013f9de900: 0000000000000000 0000000000000000 ................
ffff88013f9de910: 0000000000000000 0000000000000000 ................
ffff88013f9de920: 0000000000000000 0000000000000000 ................
ffff88013f9de930: 0000000000000000 0000000000000000 ................
ffff88013f9de940: 0000000000000000 0000000000000000 ................
ffff88013f9de950: 0000000000000000 0000000000000000 ................
ffff88013f9de960: 0000000000000000 0000000000000000 ................
ffff88013f9de970: 0000000000000000 0000000000000000 ................
ffff88013f9de980: ffff88018fb86ed0 0000000000000000 .n..............
ffff88013f9de990: 0000000000000000 0000000000000000 ................
ffff88013f9de9a0: 0000000000000000 0000000000000000 ................
ffff88013f9de9b0: 0000000000000000 0000000000000000 ................
ffff88013f9de9c0: 0000000000000000 0000000000000000 ................
ffff88013f9de9d0: 0000000000000000 0000000000000000 ................
ffff88013f9de9e0: 0000000000000000 0000000000000000 ................
ffff88013f9de9f0: 0000000000000000 0000000000000000 ................


5) 到这里没有看出非常明显的规律,只知道一块内存的前8个字节内容不合理。后来又重现了这个panic,分析其他vmcore时也发现了类似的情况,由此想到了这块与内存相邻的内存区可能产生了溢出。由于这块内存位于size-512的slab中,根据slab的特点,直接对 0xffff88013f9de800 -512这个地址的数据进行读入:

crash> rd ffff88013f9de600 64  ;0xffff88013f9de800 - 512 = ffff88013f9de600 
ffff88013f9de600:  0000000000000000 ffffffffa02460e0   .........`$.....
ffff88013f9de610:  ffffffffa0245e80 0000000000000000   .^$.............
ffff88013f9de620:  ffff880232d88620 ffff88013a6eac20    ..2.... .n:....
ffff88013f9de630:  ffff88015ae3f600 ffff88015ae3f540   [email protected]....
ffff88013f9de640:  ffff88015ae3f480 ffff88015ae3f3c0   ...Z.......Z....
ffff88013f9de650:  ffff88015ae3f300 ffff88015ae3f240   [email protected]....
ffff88013f9de660:  ffff88015ae3f6c0 0000000000000000   ...Z............
ffff88013f9de670:  00000000170b170b ffff880141931e98   ...........A....
ffff88013f9de680:  ffff880141931e98 0000000000000000   ...A............
ffff88013f9de690:  0000000000000000 0000000000000000   ................
ffff88013f9de6a0:  0000000000030003 ffff88013f9de6a8   ...........?....
ffff88013f9de6b0:  ffff88013f9de6a8 ffff88015488a080   ...?.......T....
ffff88013f9de6c0:  ffff88012b739358 ffff8802368e53a0   X.s+.....S.6....
ffff88013f9de6d0:  00000001010778fd ffffffffa0243570   .x......p5$.....
ffff88013f9de6e0:  ffff880133821028 ffff8802368e4000   ([email protected]....
ffff88013f9de6f0:  0000000000000000 0000000000000000   ................
ffff88013f9de700:  0000000000000000 00000000ffffffff   ................
ffff88013f9de710:  0000000000000000 0000000000000000   ................
ffff88013f9de720:  ffff88013f9de720 ffff88013f9de720    ..?.... ..?....
ffff88013f9de730:  0000020000000001 0000000000000200   ................
ffff88013f9de740:  0000000000000000 0000000068676867   ........ghgh....
ffff88013f9de750:  ffff88013f9de750 ffff88013f9de750   P..?....P..?....
ffff88013f9de760:  0000000001000000 ffff88012234ec60   ........`.4"....
ffff88013f9de770:  0000002000000000 00001f4000001f40   .... ...@...@...
ffff88013f9de780:  0000000000000040 0000000000000000   @...............
ffff88013f9de790:  0000000000000000 0000000000000018   ................
ffff88013f9de7a0:  0000000000000051 000000000000004e   Q.......N.......
ffff88013f9de7b0:  0000000000000062 0000000000000169   b.......i.......
ffff88013f9de7c0:  0000000000000de6 00000000000005e3   ................
ffff88013f9de7d0:  0000000000000587 0000000000000039   ........9.......
ffff88013f9de7e0:  0000000000000140 000000000000000a   @...............
ffff88013f9de7f0:  0000000000000024 0000000000000018   $...............


6)上面的结构体解析,没有找到非常有意义的字符串,并不能直接告诉我们它对应什么结构体,其大小也不一定正好是512字节。但是我们能够知道该结构体前半部分有很多指针,后半部分有很多整数,看起来像个整数数组。

  我们进一步读入该结构体内指针(标记为红色)指向的地址后,找到了一些有特征的字符串,这些字符串属于我们自己开发的模块,具体细节就不在这里列出来了。通过那些有特征的字符串,很快定位到了对应的结构体。发现这个结构体的大小正好为512,该结构体的最后一部分确实是个long型的数组。随后的代码review发现了操作该结构体的一段有问题的代码的确会越界一个long的大小(8字节)。



你可能感兴趣的:(kdump)