前一阵子搞动态替换内核函数,过程十分痛苦,特写此文,纪念那些该死的Oops
一、源代码文件
forward.c 驱动模块的文件
- #define CODESIZE 4
-
- int (* orig_rtc_dev_open) (struct inode *inode, struct file *file) =
- ( int(*)(struct inode *inode, struct file *file))0xc01f5f00;
-
- void* my_memcpy (void *dest, const void *src, int size)
- {
- const char *p = src;
- char *q = dest;
- int i;
- for (i=0; i<size; i++) *q++ = *p++;
- return dest;
- }
-
- int my_rtc_dev_open(struct inode *inode, struct file *file)
- {
- printk("leonlalal\n");
- return 0;
- }
-
- int forward_init(void)
- {
- uint32_t addr = 0;
- int i;
- char rtc_dev_open_buf[CODESIZE];
-
- addr = (uint32_t)my_rtc_dev_open;
-
- addr = (addr<<2) & ~0xfe000003 ;
-
- addr |= 0x48000000;
- printk("fuc %08x\n", addr);
-
- lock_kernel();
-
- for(i = 0; i< CODESIZE; i++){
- printk("%08x \n", *((uint32_t *)orig_rtc_dev_open + i));
- }
-
- //kernel code ---> buf
- my_memcpy(rtc_dev_open_buf, (char *)orig_rtc_dev_open, CODESIZE);
-
-
- printk("rtc_dev_open_buf[%08x]\n", *((uint32_t *)rtc_dev_open_buf));
-
- //new jump code ---> kernel code
- my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
-
- unlock_kernel();
- return 0;
- }
-
- void forward_exit (void)
- {
- //buf ---> kernel code
- my_memcpy(orig_rtc_dev_open, rtc_dev_open_buf, CODESIZE);
- }
二、看Oops
加载后通过ioctl执行forward_init函数,出现oops
- -bash-3.2# dmesg -c
- dbg exit now
- the major device No. is 215
- fuc 4818a900
- 9421ffe0
- 7c0802a6
- 90010024
- bfa10014
- rtc_dev_open_buf[9421ffe0]
- Oops: kernel access of bad area, sig: 11 [#1]
- PREEMPT
- NIP: f1062a28 LR: f1062b0c CTR: 00000004
- REGS: efec5de0 TRAP: 0300 Not tainted (2.6.24.2)
- MSR: 00029000 CR: 44000422 XER: 00000000
- DEAR: 4818a900, ESR: 00000000
- TASK = efea7aa0[1243] 'my_dump' THREAD: efec4000
- GPR00: f1062afc efec5e90 efea7aa0 c01f5f00 4818a900 00000004 00000000 c033d130 第三步
- GPR08: 00000000 c01f5f00 00000000 c0340000 24000482 100191e8 00000240 00000000
- GPR16: 1010f6b0 1009ea94 100f0000 100f7ac8 00000000 1010f540 100ff0b8 00000000
- GPR24: 3000e86c 00000002 7fefbc24 0000002e 4818a900 f1060000 f1060000 f1060000
- NIP [f1062a28] my_memcpy+0x10/0x28 [enetdbg] 第二步
- LR [f1062b0c] forward_init+0xa0/0xbc [enetdbg]
- Call Trace:
- [efec5e90] [f1062afc] forward_init+0x90/0xbc [enetdbg] (unreliable) 第一步
- [efec5eb0] [f106011c] dbg_ioctl+0x11c/0x148 [enetdbg]
- [efec5ec0] [c0086d04] do_ioctl+0x84/0xc0
- [efec5ee0] [c0086df8] vfs_ioctl+0xb8/0x448
- [efec5f10] [c00871c8] sys_ioctl+0x40/0x74
- [efec5f40] [c0002340] ret_from_syscall+0x0/0x3c
- Instruction dump:
- 409dffac 387a45c0 48000161 80010034 bb210014 38210030 7c0803a6 4e800020
- 2c050000 7c691b78 4c810020 7ca903a6 <88040000> 38840001 98090000 39290001 第四步
- -bash-3.2#
1. 先看
Call Trace, 知道是死在了
forward_init()里
2. 再看
NIP(Next Instruction Pointer):
出错时,下一个执行令的地址为
[f1062a28] my_memcpy+0x10/0x28 [enetdbg]
[f1062a28] 出错指令的绝对地址
my_memcpy+0x10/0x28 出错指令在
my_memcpy函数中的偏移量为
0x10, 函数总长度为
0x28
[enetdbg] 出错指令的所属模块
3. 看看GPR(General Purpose Register):
通用寄存器,其中 r3-r10是用来放形参的
r3 c01f5f00 对应第一个形参 orig_rtc_dev_open,存放指令的地址
r4 4818a900 对应第二个形参 addr ,即新指令的值,通过打印信息确定 //错误就在这
r5 00000004 对应第三个形参 CODESIZE
现在基本确认是死在了 forward_init() ---> my_memcpy() 里
4. 死在了哪条语句上呢,就得看 Instruction dump
我们看到执行的最后几条指令是
<88040000> 38840001 98090000 39290001
怎么找这几条指令的对应的C代码呢,就需要反汇编了
三、反汇编找C代码
前面forwar.c 编译后生成了 forward.o
对其进行反汇编 ppc/85xx/bin/ppc_85xx-objdump -S forwar.o > forward_debug.txt
看看forward_debug.txt的内容
- forward.o: 文件格式 elf32-powerpc
-
- 反汇编 .text 节:
-
- 00000000 <my_memcpy>:
- {
- const char *p = src;
- char *q = dest;
- int i;
- for (i=0; i<size; i++) *q++ = *p++;
- 0: 2c 05 00 00 cmpwi r5,0
- 4: 7c 69 1b 78 mr r9,r3
- 8: 4c 81 00 20 blelr
- c: 7c a9 03 a6 mtctr r5
- 10: 88 04 00 00 lbz r0,0(r4)
- 14: 38 84 00 01 addi r4,r4,1
- 18: 98 09 00 00 stb r0,0(r9)
- 1c: 39 29 00 01 addi r9,r9,1
- 20: 42 00 ff f0 bdnz+ 10 <my_memcpy+0x10>
- return dest;
- }
- 24: 4e 80 00 20 blr
-
- 00000028 <my_rtc_dev_open>:
-
- int my_rtc_dev_open(struct inode *inode, struct file *file)
- {
- 28: 94 21 ff f0 stwu r1,-16(r1)
- printk("leonlalal\n");
- 2c: 3c 60 00 00 lis r3,0
- 30: 7c 08 02 a6 mflr r0
- 34: 38 63 00 00 addi r3,r3,0
- 38: 90 01 00 14 stw r0,20(r1)
- 3c: 48 00 00 01 bl 3c <my_rtc_dev_open+0x14>
- return 0;
- }
- 40: 80 01 00 14 lwz r0,20(r1)
- 44: 38 60 00 00 li r3,0
- 48: 38 21 00 10 addi r1,r1,16
- 4c: 7c 08 03 a6 mtlr r0
- 50: 4e 80 00 20 blr
-
- 00000054 <forward_init>:
-
- int forward_init(void)
- {
- 54: 94 21 ff e0 stwu r1,-32(r1)
- uint32_t addr = 0;
- int i;
-
- addr = (uint32_t)my_rtc_dev_open;
- 58: 3d 20 00 00 lis r9,0
-
- addr = (addr<<2) & ~0xfe000003 ;
-
- addr |= 0x48000000;
- printk("fuc %08x\n", addr);
- 5c: 3c 60 00 00 lis r3,0
- 60: 7c 08 02 a6 mflr r0
- 64: 38 63 00 0c addi r3,r3,12
- 68: bf 81 00 10 stmw r28,16(r1)
- 6c: 3b 89 00 00 addi r28,r9,0
- 70: 57 9c 11 fa rlwinm r28,r28,2,7,29
- 74: 90 01 00 24 stw r0,36(r1)
- 78: 67 9c 48 00 oris r28,r28,18432
-
- lock_kernel();
-
- for(i = 0; i< CODESIZE; i++){
- 7c: 3b a0 00 00 li r29,0
- 80: 7f 84 e3 78 mr r4,r28
- 84: 4c c6 31 82 crclr 4*cr1+eq
- 88: 48 00 00 01 bl 88 <forward_init+0x34>
- 8c: 3f c0 00 00 lis r30,0
- 90: 48 00 00 01 bl 90 <forward_init+0x3c>
- 94: 3f e0 00 00 lis r31,0
- printk("%08x \n", *((uint32_t *)orig_rtc_dev_open + i));
- 98: 81 7f 00 00 lwz r11,0(r31)
- 9c: 57 a9 10 3a rlwinm r9,r29,2,0,29
- a0: 38 7e 00 18 addi r3,r30,24
- a4: 3b bd 00 01 addi r29,r29,1
- a8: 7c 89 58 2e lwzx r4,r9,r11
- ac: 4c c6 31 82 crclr 4*cr1+eq
- b0: 48 00 00 01 bl b0 <forward_init+0x5c>
- b4: 2f 9d 00 03 cmpwi cr7,r29,3
- b8: 40 9d ff e0 ble+ cr7,98 <forward_init+0x44>
- }
-
- //kernel code ---> buf
- my_memcpy(rtc_dev_open_buf, (char *)orig_rtc_dev_open, CODESIZE);
- bc: 80 9f 00 00 lwz r4,0(r31)
- c0: 3f a0 00 00 lis r29,0
- c4: 38 a0 00 04 li r5,4
- c8: 38 7d 00 00 addi r3,r29,0
- cc: 48 00 00 01 bl cc <forward_init+0x78>
-
-
- printk("rtc_dev_open_buf[%08x]\n", *((uint32_t *)rtc_dev_open_buf));
- d0: 80 9d 00 00 lwz r4,0(r29)
- d4: 3c 60 00 00 lis r3,0
- d8: 38 63 00 20 addi r3,r3,32
- dc: 4c c6 31 82 crclr 4*cr1+eq
- e0: 48 00 00 01 bl e0 <forward_init+0x8c>
-
- //new jump code ---> kernel code
- my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
- e4: 80 7f 00 00 lwz r3,0(r31)
- e8: 7f 84 e3 78 mr r4,r28
- ec: 38 a0 00 04 li r5,4
- f0: 48 00 00 01 bl f0 <forward_init+0x9c>
-
- unlock_kernel();
- f4: 48 00 00 01 bl f4 <forward_init+0xa0>
- return 0;
- }
- f8: 80 01 00 24 lwz r0,36(r1)
- fc: 38 60 00 00 li r3,0
- 100: bb 81 00 10 lmw r28,16(r1)
- 104: 7c 08 03 a6 mtlr r0
- 108: 38 21 00 20 addi r1,r1,32
- 10c: 4e 80 00 20 blr
-
- 00000110 <forward_exit>:
-
- void forward_exit (void)
- {
- //buf ---> kernel code
- my_memcpy(orig_rtc_dev_open, rtc_dev_open_buf, CODESIZE);
- 110: 3d 20 00 00 lis r9,0
- 114: 3c 80 00 00 lis r4,0
- 118: 80 69 00 00 lwz r3,0(r9)
- 11c: 38 84 00 00 addi r4,r4,0
- 120: 38 a0 00 04 li r5,4
- 124: 48 00 00 00 b 124 <forward_exit+0x14>
来搜
<88040000> 38840001 98090000 39290001 来这几条指令
原来是死在了my_memcpy里的赋值语句上。
赋值操作应该没有错,那就是my_memcpy 形参错了,最后定位到
- //new jump code ---> kernel code
- my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
我擦,addr丢了一个&,改之
my_memcpy
(orig_rtc_dev_open
,
(char
*
)&addr
, CODESIZE
)
;
---------------------------------------------------------------------------------------------
后话:
编译,运行,那就是另外一个Oops了,不是死在自己的模块函数里了,死在内核函数里了。
反汇编找C代码的时候更麻烦了,不过套路是一样的,反汇编内核就
ppc/85xx/bin/ppc_85xx-objdump -S vmlinux | less
有些时候,Oops的Instruction dump里全是xxxx,那你就得详细的查查看LR,CTR什么的是不是错了,多半你的代码跳到了一个非法的地址。
四、arm的oops调试(没地儿放,就这了吧)
- [root@leonwang]# mmcconfig -a
- Unable to handle kernel NULL pointer dereference at virtual address 00000048
- pgd = dfbac000
- [00000048] *pgd=1fadb831, *pte=00000000, *ppte=00000000
- Internal error: Oops: 817 [#1] PREEMPT SMP
- Modules linked in: mmc_drv(O)
- CPU: 0 Tainted: G O (3.3.0-14.2-build1 #58)
- PC is at xilinx_prepare_hs_sdr+0x30/0xb8
- LR is at xilinx_prepare_hs_sdr+0x28/0xb8
- pc : [<c02a74d8>] lr : [<c02a74d0>] psr: 60000013
- sp : dfae3ec0 ip : dfae2000 fp : 00000000
- r10: 00000000 r9 : dfae2000 r8 : 00000000
- r7 : dfbed400 r6 : 00000000 r5 : df871280 r4 : df871000
- r3 : 00000000 r2 : 00000000 r1 : 017d7840 r0 : 00000006
- Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
- Control: 18c5387d Table: 1fbac04a DAC: 00000015
- Process mmcconfig (pid: 555, stack limit = 0xdfae22f0)
- Stack: (0xdfae3ec0 to 0xdfae4000)
- 3ec0: bef41d20 df871000 dfae2000 bf000b0c df030dac dfae3f78 00000001 df9cd000
- 3ee0: ffffff9c c0015d28 00000000 00000007 c04c6084 b6e9a670 00000006 b6f39000
- 3f00: df5704a8 bef41d20 dfa68a40 00000003 c0015d28 c00a9760 00005452 c00aa1e0
- 3f20: 00000000 00000000 df993240 0000001b 00000000 c008a678 dfae13c8 00000000
- 3f40: 00000000 00000000 00000002 00000003 00000003 c0099b98 00000000 00000003
- 3f60: 00000003 dfa68a40 bef41d20 c0084502 00000003 c0015d28 dfae2000 00000000
- 3f80: 00000000 c00aa25c 00000003 00000000 bef41d20 00015a3c 00015a1c 00015a28
- 3fa0: 00000036 c0015b80 00015a3c 00015a1c 00000003 c0084502 bef41d20 00015a60
- 3fc0: 00015a3c 00015a1c 00015a28 00000036 00000000 00000000 00015a34 00000000
- 3fe0: bef41e94 bef41d18 0000c150 b6e9a67c 60000010 00000003 f7fff3ff 6f7ddfff
- [<c02a74d8>] (xilinx_prepare_hs_sdr+0x30/0xb8) from [<bf000b0c>] (emmc_test_ioctl+0x210/0x2e4 [mmc_drv])
- [<bf000b0c>] (emmc_test_ioctl+0x210/0x2e4 [mmc_drv]) from [<c00a9760>] (vfs_ioctl+0x24/0x40)
- [<c00a9760>] (vfs_ioctl+0x24/0x40) from [<c00aa1e0>] (do_vfs_ioctl+0x4cc/0x514)
- [<c00aa1e0>] (do_vfs_ioctl+0x4cc/0x514) from [<c00aa25c>] (sys_ioctl+0x34/0x54)
- [<c00aa25c>] (sys_ioctl+0x34/0x54) from [<c0015b80>] (ret_fast_syscall+0x0/0x30)
- Code: 0a000000 e12fff33 e3a00006 e59f1074 (e5860048)
- ---[ end trace 901cac0a83592b19 ]---
- Segmentation fault
- [root@leonwang]#
看来是内核build-in 驱动函数xilinx_prepare_hs_sdr() 出bug了
- #arm-linux-objdump -S sdhci_xilinx.o >> tmp_file
- #cat tmp_file
- ..........略
-
- 710 00000a7c <xilinx_prepare_hs_sdr>:
- 711 a7c: e92d4070 push {r4, r5, r6, lr}
- 712 a80: e1a04000 mov r4, r0
- 713 a84: e2805d0a add r5, r0, #640 ; 0x280
- 714 a88: e5906290 ldr r6, [r0, #656]
- 715 a8c: f57ff04f dsb sy
- 716 a90: e59f3088 ldr r3, [pc, #136] ; b20 <xilinx_prepare_hs_sdr+0xa4>
- 717 a94: e5933018 ldr r3, [r3, #24]
- 718 a98: e3530000 cmp r3, #0 ; 0x0
- 719 a9c: 0a000000 beq aa4 <xilinx_prepare_hs_sdr+0x28>
- 720 aa0: e12fff33 blx r3
- 721 aa4: e3a00006 mov r0, #6 ; 0x6
- 722 aa8: e59f1074 ldr r1, [pc, #116] ; b24 <xilinx_prepare_hs_sdr+0xa8>
- 723 aac: e5860048 str r0, [r6, #72]
- 724 ab0: e1a00004 mov r0, r4
- 725 ab4: ebfffffe bl 2a0 <xilinx_set_clk>
- 726 ab8: e3a0400b mov r4, #11 ; 0xb
- 727 abc: ea000001 b ac8 <xilinx_prepare_hs_sdr+0x4c>
- 728 ac0: e59f0060 ldr r0, [pc, #96] ; b28 <xilinx_prepare_hs_sdr+0xac>
- 729 ac4: ebfffffe bl 0 <__const_udelay>
- 730 ac8: e2544001 subs r4, r4, #1 ; 0x1
- 731 acc: 1afffffb bne ac0 <xilinx_prepare_hs_sdr+0x44>
找到 xilinx_prepare_hs_sdr+0x30 处
xilinx_prepare_hs_sdr 是 00000a7c
xilinx_prepare_hs_sdr+0x30 是 00000aac
00000aac处指令为 e5860048, 与上面oops日志
Code: 0a000000 e12fff33 e3a00006 e59f1074 (e5860048)
相对应
下面对应的是驱动中的C代码
- 42 #define XILINX_EMMC_DMA_RD_LEN 0x40
- 43 #define XILINX_EMMC_DMA_WR_LEN 0x44
- 44 #define XILINX_EMMC_CFG 0x48
-
- ........
-
- 87 //XXX "XILINX_EMMC_CFG" field
- 88
- 89 #define CFG_DDR_ENABLE 0x00000001 // 0.SDR 1.DDR
- 90 #define CFG_DATA_CLK 0x00000002 // 0.400k 1.data transfer clock
- 91 #define CFG_DMA_ENABLE 0x00000004 // 0.disable 1.enable
- 92 #define CFG_HS400_ENABLE 0x00000008 // 0.disable 1.enable
-
- ........
-
- 418 void xilinx_prepare_hs_sdr(struct mmc_host *mmc)
- 419 {
- 420 struct xilinx_emmc_host *host = mmc_priv(mmc);
- 421 uint8_t *base_reg = host->base_reg;
- 422 uint32_t tmp = 0;
- 423 int i = 0;
- 424
- 425 // xilinx host enter hs sdr mode
- 426 if( xilinx_use_dma )
- 427 tmp |= CFG_DMA_ENABLE;
- 428
- 429 writel( tmp | CFG_DATA_CLK, base_reg + XILINX_EMMC_CFG);
- 430
- 431 xilinx_set_clk(mmc, 25000000); // 25MHz
- 432 mdelay(10);
- 433
- 434
- 435 // xilinx host only support 8-bit bus width(single date rate)
- 436 tmp = (MMC_SWITCH_MODE_WRITE_BYTE << 24) | // write
- 437 (EXT_CSD_BUS_WIDTH << 16) | // index
- 438 (2 << 8); // value
- 439
- 440 xilinx_send_pri_cmd(host,MMC_SWITCH, tmp , MMC_RSP_R1);
- 441
- 442 for(i = 0; i < XILINX_RETRY_MAX; i++){
- 443 tmp = xilinx_send_pri_cmd(host,MMC_SEND_STATUS, 0x00010000, MMC_RSP_R1 );
- 444 if(R1_CURRENT_STATE(tmp) != R1_STATE_PRG)
- 445 break;
- 446 }
- 447
- 448 if(i == XILINX_RETRY_MAX)
- 449 printk("send pri switch EXT_CSD_BUS_WIDTH error [%08x]\n", tmp);
- 450
- 451 return ;
- 452 }
汇编中的
- 721 aa4: e3a00006 mov r0, #6 ; 0x6
- 722 aa8: e59f1074 ldr r1, [pc, #116] ; b24 <xilinx_prepare_hs_sdr+0xa8>
- 723 aac: e5860048 str r0, [r6, #72]
对应C中的
- 425 // xilinx host enter hs sdr mode
- 426 if( xilinx_use_dma )
- 427 tmp |= CFG_DMA_ENABLE;
- 428
- 429 writel( tmp | CFG_DATA_CLK, base_reg + XILINX_EMMC_CFG);
因为tmp最终值为6 ( 宏 CFG_DMA_ENABLE | CFG_DATA_CLK的值为6)
宏XILINX_EMMC_CFG 的值为0x48
汇编代码中 其实执行的是 writel(6,0x48)
即base_reg为0,异常。 排查得知xilinx_prepare_hs_sdr传入的形参错误,导致host指针错误