简单记录下Linux Oops的定位步骤。首先创建一个demo,这个demo是创建一个字符设备demo。
一、创建demo 模块的代码
#include#include #include #include #include #define MINOR_BASE 0 #define COUNT_NUM 3 static struct cdev *cdevp = NULL; static dev_t dev; struct class *demo_class = NULL; struct device *drv_device; int demo_open(struct inode *node, struct file *filp) { printk(KERN_INFO "%s %d demo_open called...\n",__func__,__LINE__); int *p = NULL; *p = 1; /* 空指针赋值 */ return 0; } int demo_release(struct inode *node, struct file *filp) { printk(KERN_INFO "%s %d demo_release called...\n",__func__,__LINE__); return 0; } ssize_t demo_read(struct file *filep, char __user *user, size_t size, loff_t *loffp) { printk(KERN_INFO "%s %d demo_read called...\n",__func__,__LINE__); return 0; } ssize_t demo_write(struct file * filep, const char __user *user, size_t size, loff_t * loffp) { printk(KERN_INFO "%s %d demo_write called...\n",__func__,__LINE__); return size; } struct file_operations opfp = { .owner = THIS_MODULE, .open = demo_open, .release = demo_release, .read = demo_read, .write = demo_write, }; static int __init demo_init(void) { int ret; //dev_t ret = alloc_chrdev_region(&dev,MINOR_BASE,COUNT_NUM,"cdev_demo"); if( ret<0 ){ pr_err("%s %d alloc_chrdev_region error...\n",__func__,__LINE__); goto err_ret; } pr_info("MARJO DEV_T = %d\n",MAJOR(dev)); //cdev cdevp = cdev_alloc(); if( NULL == cdevp ){ pr_err("%s %d cdev_alloc error...\n",__func__,__LINE__); goto err_chrdev_unreg; } //init cdev cdev_init(cdevp, &opfp); //add dev ret = cdev_add(cdevp, dev, COUNT_NUM); if( ret<0 ){ pr_err("cdev_add error...\n"); goto err_chrdev_unreg; } //create class demo_class = class_create(THIS_MODULE, "demo"); if (IS_ERR(demo_class)) { pr_err("%s %d demo: class_create failed for adf_ctl\n",__func__,__LINE__); goto err_cdev_del; } //create dev file drv_device = device_create(demo_class, NULL, dev, NULL, "demo"); if (IS_ERR(drv_device)) { pr_err("demo: failed to create device\n"); goto err_class_destr; } pr_info("device created success\n"); printk("++++++++++++++%s %s %d+++++++++++++++\n",__FILE__,__func__,__LINE__); return 0; err_class_destr: class_destroy(demo_class); err_cdev_del: cdev_del(cdevp); err_chrdev_unreg: unregister_chrdev_region(dev,COUNT_NUM); err_ret: return ret; } static void __exit demo_exit(void) { device_destroy(demo_class,dev); cdev_del(cdevp); class_destroy(demo_class); unregister_chrdev_region(dev,COUNT_NUM); printk("+++++++++++++%s %s %d+++++++++++++++\n",__FILE__,__func__,__LINE__); } module_init(demo_init); module_exit(demo_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("zxh@2019/1/17"); MODULE_DESCRIPTION("fist test demo");
二、编译demo的makefile //模块的统一格式
obj-m:=demo.o KDIR:=/usr/src/linux-headers-3.16.0-30-generic #kernel源码路径 PWD:=$(shell pwd) all: #伪指令,所有目标的目标,make默认会匹配all $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules @rm -rf *.mod.* #@ 执行命令不显示 @rm -rf *.o @rm -rf Module.* @rm -rf modules.* .PHANY:clean clean: rm -rf *.ko *.o
三、insmod demo.ko
通过dmesg查看打印信息
[ 527.256991] MARJO DEV_T = 250
[ 527.265068] device created success
[ 527.265081] ++++++++++++++/home/luster/share/BiTree/module_demo/demo.c demo_init 82+++++++++++++++
四、cat /dev/demo
通过dmesg查看打印信息
[ 6319.390421] demo_open 15 demo_open called...// demo_open函数被调用 [ 6319.391910] BUG: unable to handle kernel NULL pointer dereference at (null) [ 6319.392210] IP: [] demo_open+0x29/0x40 [demo] [ 6319.392685] *pdpt = 0000000034814001 *pde = 0000000000000000 [ 6319.392747] Oops: 0002 [#1] SMP [ 6319.392842] Modules linked in: demo(OE) xt_nat xt_REDIRECT xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack ip_tables x_tables esn_cfs(OE) snd_ens1371 snd_ac97_codec ac97_bus gameport snd_pcm coretemp crc32_pclmul snd_seq_midi snd_seq_midi_event snd_rawmidi aesni_intel aes_i586 xts lrw bnep rfcomm snd_seq bluetooth gf128mul 6lowpan_iphc vmw_balloon ablk_helper cryptd vmwgfx snd_seq_device serio_raw snd_timer joydev ttm drm_kms_helper snd soundcore binfmt_misc vmw_vmci shpchp drm i2c_piix4 parport_pc ppdev lp parport mac_hid hid_generic usbhid hid psmouse mptspi mptscsih mptbase pcnet32 mii scsi_transport_spi pata_acpi floppy [ 6319.393110] CPU: 0 PID: 3304 Comm: cat Tainted: G OE 3.16.0-30-generic #40~14.04.1-Ubuntu [ 6319.393120] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 6319.393162] task: e04da520 ti: e4bec000 task.ti: e4bec000 [ 6319.393223] EIP: 0060:[ ] EFLAGS: 00010246 CPU: 0 [ 6319.393246] EIP is at demo_open+0x29/0x40 [demo] [ 6319.393261] EAX: 00000000 EBX: dd92efc0 ECX: f6f6bb88 EDX: f6f6a5c4 [ 6319.393269] ESI: d50dbca4 EDI: e074f3c0 EBP: e4beddbc ESP: e4beddb0 [ 6319.393279] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [ 6319.393298] CR0: 8005003b CR2: 00000000 CR3: 3486a000 CR4: 001407f0 [ 6319.393365] Stack: [ 6319.393404] f99d6024 f99d623c 0000000f e4beddd8 c11920d7 f99d7000 00000000 e074f3c0 [ 6319.393445] d50dbca4 00000000 e4beddfc c118c34c 00002000 c1192050 e074f3c8 e054b880 [ 6319.393448] 00000000 e3c0a000 e074f3c0 e4bede10 c118c4bc e4bedecc 00000000 00000000 [ 6319.393470] Call Trace: //函数调用关系 [ 6319.393767] [ ] chrdev_open+0x87/0x190 [ 6319.393780] [ ] do_dentry_open+0x1ec/0x2e0 [ 6319.393798] [ ] ? cdev_put+0x20/0x20 [ 6319.393803] [ ] vfs_open+0x3c/0x50 [ 6319.393806] [ ] do_last+0x4c1/0xf10 [ 6319.393807] [ ] ? link_path_walk+0x5f/0x6d0 [ 6319.393829] [ ] ? apparmor_file_alloc_security+0x4a/0x140 [ 6319.393831] [ ] path_openat+0xb0/0x540 [ 6319.393833] [ ] do_filp_open+0x31/0x80 [ 6319.393836] [ ] do_sys_open+0x115/0x260 [ 6319.393866] [ ] ? file_redirect_check+0xaf/0x3a0 [esn_cfs] [ 6319.393871] [ ] ? GetNextDict+0x60/0xb0 [esn_cfs] [ 6319.393881] [ ] SyS_open+0x22/0x30 [ 6319.393885] [ ] efs_sys_open+0x66/0x110 [esn_cfs] [ 6319.393910] [ ] sysenter_do_call+0x12/0x12 [ 6319.393929] Code: 05 00 00 00 00 01 00 00 00 c9 c3 8d 74 26 00 8d bc 27 00 00 00 [ 6319.394128] EIP: [ ] demo_open+0x29/0x40 [demo] SS:ESP 0068:e4beddb0 [ 6319.394152] CR2: 0000000000000000 [ 6319.394381] ---[ end trace fc2838f795f2c84d ]---
五、分析oops
1)cat /dev/demo会调用模块的demo_open函数
[ 6319.390421] demo_open 15 demo_open called...
2)然后就产生了异常
[ 6319.391910] BUG: unable to handle kernel NULL pointer dereference at (null)
通过BUG可以看出是使用空指针引起的异常,这和我们写的代码一直。
[ 6319.392210] IP: [
程序的pc指针指向了demo_open 加0x29/0x40的位置
[ 6319.392747] Oops: 0002 [#1] SMP
oops的错误码,#1表示发生一次
3)通过objdump命令,查看代码的反汇编
objdump -S demo.ko //-S显示源代码的反汇编,比较清晰 demo.ko: file format elf32-i386 Disassembly of section .text: 00000000: 0:55 push %ebp 31:89 e5 add %al,(%eax) 33:83 leave 34:ec ret 35:0c e8 fc ff lea 0x0(%esi,%eiz,1),%esi 39:ff ff c7 44 24 08 0f lea 0x0(%edi,%eiz,1),%edi 40:00 push %ebp 41:00 00 mov %esp,%ebp 43:c7 44 24 sub $0xc,%esp 46:04 36 00 00 00 call 47 4b:c7 04 24 00 00 00 00 movl $0x16,0x8(%esp) 52:e8 53:fc ff ff ff 31 c0 c7 movl $0x29,0x4(%esp) 5a:05 5b:00 00 00 00 01 00 00 movl $0x0,(%esp) 62:00 c9 c3 8d 74 call 63 67:26 00 xor %eax,%eax 69:8d leave 6a:bc ret 6b:27 nop 6c:00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 00000070 : 70:55 push %ebp 71:89 e5 mov %esp,%ebp 73:83 ec 0c sub $0xc,%esp 76:e8 fc ff ff ff call 77 7b:c7 44 24 08 16 00 00 movl $0x1c,0x8(%esp) 82:00 83:c7 44 24 04 29 00 00 movl $0x1f,0x4(%esp) 8a:00 8b:c7 04 24 00 00 00 00 movl $0x1d,(%esp) 92:e8 fc ff ff ff call 93 97:31 c0 xor %eax,%eax 99:c9 leave 9a:c3 ret 9b:90 nop 9c:8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi 000000a0 : a0:55 push %ebp a1:89 e5 mov %esp,%ebp a3:83 push %ebx a4:ec 0c e8 sub $0xc,%esp a7:fc ff ff ff c7 call a8 ac:44 24 08 1c 00 00 00 movl $0x21,0x8(%esp) b3:c7 b4:44 24 mov %ecx,%ebx b6:04 1f 00 00 00 c7 04 movl $0x14,0x4(%esp) bd:24 be:1d 00 00 00 e8 fc ff movl $0x3a,(%esp) c5:ff ff 31 c0 c9 call c6 ca:c3 90 8d add $0xc,%esp cd:74 26 mov %ebx,%eax ... 000000d0 : d0:55 pop %ebp d1:89 ret d2:Address 0x00000000000000d2 is out of bounds. Disassembly of section .init.text: 00000132 : 132:55 push %ebp 133:31 d2 xor %edx,%edx 135:89 e5 mov %esp,%ebp 137:b9 03 00 00 00 mov $0x3,%ecx 13c:53 push %ebx 13d:b8 08 00 00 00 mov $0x8,%eax 142:83 ec 14 sub $0x14,%esp 145:c7 04 24 58 00 00 00 movl $0x58,(%esp) 14c:e8 fc ff ff ff call 14d 151:85 c0 test %eax,%eax 153:89 c3 mov %eax,%ebx 155:79 23 jns 17a 157:c7 44 24 08 33 00 00 movl $0x33,0x8(%esp) 15e:00 15f:c7 44 24 04 0a 00 00 movl $0xa,0x4(%esp) 166:00 167:c7 04 24 20 00 00 00 movl $0x20,(%esp) 16e:e8 fc ff ff ff call 16f 173:89 d8 mov %ebx,%eax 175:e9 5b 01 00 00 jmp 2d5 17a:a1 08 00 00 00 mov 0x8,%eax 17f:c7 04 24 62 00 00 00 movl $0x62,(%esp) 186:c1 e8 14 shr $0x14,%eax 189:89 44 24 04 mov %eax,0x4(%esp) 18d:e8 fc ff ff ff call 18e 192:e8 fc ff ff ff call 193 197:85 c0 test %eax,%eax 199:a3 0c 00 00 00 mov %eax,0xc 19e:75 21 jne 1c1 1a0:c7 44 24 08 3a 00 00 movl $0x3a,0x8(%esp) 1a7:00 1a8:c7 44 24 04 0a 00 00 movl $0xa,0x4(%esp) 1af:00 1b0:c7 04 24 76 00 00 00 movl $0x76,(%esp) 1b7:e8 fc ff ff ff call 1b8 1bc:e9 03 01 00 00 jmp 2c4 1c1:ba 00 00 00 00 mov $0x0,%edx 1c6:e8 fc ff ff ff call 1c7 1cb:8b 15 08 00 00 00 mov 0x8,%edx 1d1:b9 03 00 00 00 mov $0x3,%ecx 1d6:a1 0c 00 00 00 mov 0xc,%eax 1db:e8 fc ff ff ff call 1dc 1e0:85 c0 test %eax,%eax 1e2:89 c3 mov %eax,%ebx 1e4:79 11 jns 1f7 1e6:c7 04 24 93 00 00 00 movl $0x93,(%esp) 1ed:e8 fc ff ff ff call 1ee 1f2:e9 cd 00 00 00 jmp 2c4 1f7:b9 08 00 00 00 mov $0x8,%ecx 1fc:ba a8 00 00 00 mov $0xa8,%edx 201:b8 00 00 00 00 mov $0x0,%eax 206:e8 fc ff ff ff call 207 20b:3d 00 f0 ff ff cmp $0xfffff000,%eax 210:a3 00 00 00 00 mov %eax,0x0 215:76 21 jbe 238 217:c7 44 24 08 48 00 00 movl $0x48,0x8(%esp) 21e:00 21f:c7 44 24 04 0a 00 00 movl $0xa,0x4(%esp) 226:00 227:c7 04 24 48 00 00 00 movl $0x48,(%esp) 22e:e8 fc ff ff ff call 22f 233:e9 82 00 00 00 jmp 2ba 238:8b 15 08 00 00 00 mov 0x8,%edx 23e:c7 44 24 10 a8 00 00 movl $0xa8,0x10(%esp) 245:00 246:c7 44 24 0c 00 00 00 movl $0x0,0xc(%esp) 24d:00 24e:c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 255:00 256:89 54 24 08 mov %edx,0x8(%esp) 25a:89 04 24 mov %eax,(%esp) 25d:e8 fc ff ff ff call 25e 262:3d 00 f0 ff ff cmp $0xfffff000,%eax 267:a3 00 00 00 00 mov %eax,0x0 26c:76 18 jbe 286 26e:c7 04 24 78 00 00 00 movl $0x78,(%esp) 275:e8 fc ff ff ff call 276 27a:a1 00 00 00 00 mov 0x0,%eax 27f:e8 fc ff ff ff call 280 284:eb 34 jmp 2ba 286:c7 04 24 ad 00 00 00 movl $0xad,(%esp) 28d:e8 fc ff ff ff call 28e 292:c7 44 24 0c 52 00 00 movl $0x52,0xc(%esp) 299:00 29a:c7 44 24 08 0a 00 00 movl $0xa,0x8(%esp) 2a1:00 2a2:c7 44 24 04 9c 00 00 movl $0x9c,0x4(%esp) 2a9:00 2aa:c7 04 24 cc 00 00 00 movl $0xcc,(%esp) 2b1:e8 fc ff ff ff call 2b2 2b6:31 c0 xor %eax,%eax 2b8:eb 1b jmp 2d5 2ba:a1 0c 00 00 00 mov 0xc,%eax 2bf:e8 fc ff ff ff call 2c0 2c4:a1 08 00 00 00 mov 0x8,%eax 2c9:ba 03 00 00 00 mov $0x3,%edx 2ce:e8 fc ff ff ff call 2cf 2d3:89 d8 mov %ebx,%eax 2d5:83 c4 14 add $0x14,%esp 2d8:5b pop %ebx 2d9:5d pop %ebp 2da:c3 ret Disassembly of section .exit.text: 0000040d : 40d:55 push %ebp 40e:89 e5 mov %esp,%ebp 410:83 ec 10 sub $0x10,%esp 413:8b 15 08 00 00 00 mov 0x8,%edx 419:a1 00 00 00 00 mov 0x0,%eax 41e:e8 fc ff ff ff call 41f 423:a1 0c 00 00 00 mov 0xc,%eax 428:e8 fc ff ff ff call 429 42d:a1 00 00 00 00 mov 0x0,%eax 432:e8 fc ff ff ff call 433 437:a1 08 00 00 00 mov 0x8,%eax 43c:ba 03 00 00 00 mov $0x3,%edx 441:e8 fc ff ff ff call 442 446:c7 44 24 0c 63 00 00 movl $0x63,0xc(%esp) 44d:00 44e:c7 44 24 08 00 00 00 movl $0x0,0x8(%esp) 455:00 456:c7 44 24 04 9c 00 00 movl $0x9c,0x4(%esp) 45d:00 45e:c7 04 24 f4 00 00 00 movl $0xf4,(%esp) 465:e8 fc ff ff ff call 466 46a:c9 leave 46b:c3 ret 4)分析反汇编 00000000 : 0:55 push %ebp 31:89 e5 add %al,(%eax) 33:83 leave 34:ec ret 35:0c e8 fc ff lea 0x0(%esi,%eiz,1),%esi 39:ff ff c7 44 24 08 0f lea 0x0(%edi,%eiz,1),%edi 40:00 push %ebp 41:00 00 mov %esp,%ebp 43:c7 44 24 sub $0xc,%esp 46:04 36 00 00 00 call 47 4b:c7 04 24 00 00 00 00 movl $0x16,0x8(%esp) 52:e8 53:fc ff ff ff 31 c0 c7 movl $0x29,0x4(%esp) 5a:05 5b:00 00 00 00 01 00 00 movl $0x0,(%esp) 62:00 c9 c3 8d 74 call 63 67:26 00 xor %eax,%eax 69:8d leave 6a:bc ret 6b:27 nop 6c:00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
因为错误发生在
demo_open+0x29/0x40
即就是在这一段中间
46:04 36 00 00 00 call 474b:c7 04 24 00 00 00 00 movl $0x16,0x8(%esp) 52:e8 53:fc ff ff ff 31 c0 c7 movl $0x29,0x4(%esp) 5a:05 5b:00 00 00 00 01 00 00 movl $0x0,(%esp) 62:00 c9 c3 8d 74 call 63
5b: 00 00 00 00 01 00 00 movl $0x0,(%esp):将esp赋值给0x0,即就是空指针赋值。
大概流程就是如此,只是了解个皮毛。
最后,当发生错误时,rmmod demo将发生错误:Module demo is in use
通过查询模块lsmod
module size used
demo 12831 1
说明模块被使用,因为cat后发生异常,没有关闭demo。
通过查询资料,大概有两种方法:1、通过rmmod -f 2、通过编写另一个模块去将模块的use值改为0,然后再rmmod
第一种:rmmod -f 我的系统不支持
第二种:通过http://www.cppblog.com/csjiaxin/archive/2012/06/06/136382.html?opt=admin方法貌似也不行。
因为获取不到模块的首地址。
cat /proc/kallsyms | grep modules 00000000 t modules_open 00000000 T set_all_modules_text_rw 00000000 T set_all_modules_text_ro 00000000 T print_modules 00000000 r proc_modules_operations 00000000 r modules_op 00000000 r sih_modules_twl5031 00000000 r sih_modules_twl4030 00000000 d smp_alt_modules 00000000 d randomize_modules 00000000 D kdb_modules 00000000 d modules 00000000 T load_default_modules 00000000 t proc_modules_init 00000000 t __initcall_proc_modules_init6 00000000 B modules_disabled 00000000 B kcore_modules 00000000 b nr_sih_modules 00000000 b sih_modules
所以当发生错误时,我只能重启虚拟机。