驱动调试之内核打印信息分析

    为了更好的理解和说明内核打印信息的含义,在这里,我自己写一个驱动first_drv(其实就是一个简单的电灯程序),引入一个段错误,源码如下:

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct class *firstdrv_class;
static struct class_device	*firstdrv_class_dev;

volatile unsigned long *gpfcon = NULL;
volatile unsigned long *gpfdat = NULL;


static int first_drv_open(struct inode *inode, struct file *file)
{
	//printk("first_drv_open\n");
	/* 配置GPF4,5,6为输出 */
	*gpfcon &= ~((0x3<<(4*2)) | (0x3<<(5*2)) | (0x3<<(6*2)));
	*gpfcon |= ((0x1<<(4*2)) | (0x1<<(5*2)) | (0x1<<(6*2)));
	return 0;
}

static ssize_t first_drv_write(struct file *file, const char __user *buf, size_t count, loff_t * ppos)
{
	int val;

	//printk("first_drv_write\n");

	copy_from_user(&val, buf, count); //	copy_to_user();

	if (val == 1)
	{
		// 点灯
		*gpfdat &= ~((1<<4) | (1<<5) | (1<<6));
	}
	else
	{
		// 灭灯
		*gpfdat |= (1<<4) | (1<<5) | (1<<6);
	}
	
	return 0;
}

static struct file_operations first_drv_fops = {
    .owner  =   THIS_MODULE,    /* 这是一个宏,推向编译模块时自动创建的__this_module变量 */
    .open   =   first_drv_open,     
	.write	=	first_drv_write,	   
};


int major;
static int first_drv_init(void)
{
	major = register_chrdev(0, "first_drv", &first_drv_fops); // 注册, 告诉内核

	firstdrv_class = class_create(THIS_MODULE, "firstdrv");

	firstdrv_class_dev = class_device_create(firstdrv_class, NULL, MKDEV(major, 0), NULL, "xyz"); /* /dev/xyz */

	gpfcon = (volatile unsigned long *)0x56000050; //(volatile unsigned long *)ioremap(0x56000050, 16);
	gpfdat = gpfcon + 1;

	return 0;
}

static void first_drv_exit(void)
{
	unregister_chrdev(major, "first_drv"); // 卸载

	class_device_unregister(firstdrv_class_dev);
	class_destroy(firstdrv_class);
	iounmap(gpfcon);
}

module_init(first_drv_init);
module_exit(first_drv_exit);


MODULE_LICENSE("GPL");

在first_drv_init和first_drv_open函数里面,我要去访问一个外部寄存器,但是我并没调用ioremap函数将外部寄存器的地址映射到内存空间,而是直接访问,导致段错误发生。本驱动的测试代码如下:

#include 
#include 
#include 
#include 

/* firstdrvtest on
  * firstdrvtest off
  */
int main(int argc, char **argv)
{
	int fd;
	int val = 1;
	fd = open("/dev/xyz", O_RDWR);
	if (fd < 0)
	{
		printf("can't open!\n");
	}
	if (argc != 2)
	{
		printf("Usage :\n");
		printf("%s \n", argv[0]);
		return 0;
	}

	if (strcmp(argv[1], "on") == 0)
	{
		val  = 1;
	}
	else
	{
		val = 0;
	}
	
	write(fd, &val, 4);
	return 0;
}

下面分两种情景进行分析讨论
1. 驱动作为模块动态加载,insmod first_drv.ko,然后调用测试函数./firstdrvtest on (打开灯),这是内核打印信息如下:

Unable to handle kernel paging request at virtual address 56000050
pgd = c3eb0000
[56000050] *pgd=00000000
Internal error: Oops: 5 [#1]
Modules linked in: first_drv
CPU: 0    Not tainted  (2.6.22.6 #1)
PC is at first_drv_open+0x18/0x3c [first_drv]
LR is at chrdev_open+0x14c/0x164
pc : []    lr : []    psr: a0000013
sp : c3c7be88  ip : c3c7be98  fp : c3c7be94
r10: 00000000  r9 : c3c7a000  r8 : c049abc0
r7 : 00000000  r6 : 00000000  r5 : c3e740c0  r4 : c06d41e0
r3 : bf000000  r2 : 56000050  r1 : bf000964  r0 : 00000000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  Segment user
Control: c000717f  Table: 33eb0000  DAC: 00000015
Process firstdrvtest (pid: 777, stack limit = 0xc3c7a258)Stack: (0xc3c7be88 to 0xc3c7c000)
be80:                   c3c7bebc c3c7be98 c008d888 bf000010 00000000 c049abc0 
bea0: c3e740c0 c008d73c c0474e20 c3e766a8 c3c7bee4 c3c7bec0 c0089e48 c008d74c 
bec0: c049abc0 c3c7bf04 00000003 ffffff9c c002c044 c3d10000 c3c7befc c3c7bee8 
bee0: c0089f64 c0089d58 00000000 00000002 c3c7bf68 c3c7bf00 c0089fb8 c0089f40 
bf00: c3c7bf04 c3e766a8 c0474e20 00000000 00000000 c3eb1000 00000101 00000001 
bf20: 00000000 c3c7a000 c04a7468 c04a7460 ffffffe8 c3d10000 c3c7bf68 c3c7bf48 
bf40: c008a16c c009fc70 00000003 00000000 c049abc0 00000002 bec1fee0 c3c7bf94 
bf60: c3c7bf6c c008a2f4 c0089f88 00008520 bec1fed4 0000860c 00008670 00000005 
bf80: c002c044 4013365c c3c7bfa4 c3c7bf98 c008a3a8 c008a2b0 00000000 c3c7bfa8 
bfa0: c002bea0 c008a394 bec1fed4 0000860c 00008720 00000002 bec1fee0 00000001 
bfc0: bec1fed4 0000860c 00008670 00000002 00008520 00000000 4013365c bec1fea8 
bfe0: 00000000 bec1fe84 0000266c 400c98e0 60000010 00008720 00000000 00000000 
从这个Backtrace: (回溯)
[] (first_drv_open+0x0/0x3c [first_drv]) from [] (chrdev_open+0x14c/0x164)
[] (chrdev_open+0x0/0x164) from [] (__dentry_open+0x100/0x1e8)
 r8:c3e766a8 r7:c0474e20 r6:c008d73c r5:c3e740c0 r4:c049abc0
[] (__dentry_open+0x0/0x1e8) from [] (nameidata_to_filp+0x34/0x48)
[] (nameidata_to_filp+0x0/0x48) from [] (do_filp_open+0x40/0x48)
 r4:00000002
[] (do_filp_open+0x0/0x48) from [] (do_sys_open+0x54/0xe4)
 r5:bec1fee0 r4:00000002
[] (do_sys_open+0x0/0xe4) from [] (sys_open+0x24/0x28)
[] (sys_open+0x0/0x28) from [] (ret_fast_syscall+0x0/0x2c)
Code: e24cb004 e59f1024 e3a00000 e5912000 (e5923000) 
Segmentation fault

打印信息,明显知道,内核访问56000050这个地址的时候出错了,并且pc指针位于first_drv_open函数的0x18处(0x3c是这个函数的总大小)pc=bf000018,注意!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
对于这个值正常处理flow如下:

       我们要首先确定这个值是属于内核还是insmod外加的模块(在这里当然是外加的模块),怎么确认呢???先判断这个值是否属于内核地址,可以查看system.map(编译内核源码生成的)来确定内核的函数的地址范围:c0004000~c03265a4,显然pc这个值不属于内核函数范围,则它是insmod加载的驱动程序。

PC is at first_drv_open+0x18/0x3c [first_drv]
        很多时候这句log并不会打印出来,不会告诉你pc值所在函数,需要我们手动去找,正常处理flow为,执行cat /proc/kallsyms 打印出内核的所有函数符号表(这玩意就相当于DN和IP的关系),找到与pc值相近的一个地址(小于等于pc)
bf000000     t      first_drv_open     [first_drv]
这里我们知道first_drv_open的地址为bf000000,我们执行arm-linux-objdump -D first_drv.ko > first_drv.dis,编译出反汇编文件,查看反汇编文件
first_drv.dis文件里              insmod后
00000000 :       bf000000 t first_drv_open	[first_drv]
00000018                         pc = bf000018
first_drv.ko:     file format elf32-littlearm

Disassembly of section .text:

00000000 :
   0:	e1a0c00d 	mov	ip, sp
   4:	e92dd800 	stmdb	sp!, {fp, ip, lr, pc}
   8:	e24cb004 	sub	fp, ip, #4	; 0x4
   c:	e59f1024 	ldr	r1, [pc, #36]	; 38 <__mod_vermagic5>
  10:	e3a00000 	mov	r0, #0	; 0x0
  14:	e5912000 	ldr	r2, [r1]
  18:	e5923000 	ldr	r3, [r2]  // 在这里出错 r2=56000050
根据汇编查找源码,找到出错原因!!!!
2. 驱动直接编译近内核
log如下:
Modules linked in:
CPU: 0    Not tainted  (2.6.22.6 #2)
PC is at first_drv_open+0x18/0x3c
LR is at chrdev_open+0x14c/0x164
pc : []    lr : []    psr: a0000013
sp : c3a03e88  ip : c3a03e98  fp : c3a03e94
r10: 00000000  r9 : c3a02000  r8 : c03f3c60
r7 : 00000000  r6 : 00000000  r5 : c38a0c50  r4 : c3c1e780
r3 : c014e6a8  r2 : 56000050  r1 : c031a47c  r0 : 00000000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  Segment user
Control: c000717f  Table: 339f0000  DAC: 00000015
Process firstdrvtest (pid: 750, stack limit = 0xc3a02258)

同样,根据system.map,发现pc是属于内核函数范围,并且在里面找到pc=c014e6c0相近的函数c014e6a8
反汇编内核 arm-linux-objdump -D vmlinux > vmlinux.dis 搜索c014e6c0

c014e6a8 :
c014e6a8:       e1a0c00d        mov     ip, sp
c014e6ac:       e92dd800        stmdb   sp!, {fp, ip, lr, pc}
c014e6b0:       e24cb004        sub     fp, ip, #4      ; 0x4
c014e6b4:       e59f1024        ldr     r1, [pc, #36]   ; c014e6e0 <.text+0x1276e0>
c014e6b8:       e3a00000        mov     r0, #0  ; 0x0
c014e6bc:       e5912000        ldr     r2, [r1]
c014e6c0:       e5923000        ldr     r3, [r2] // 在此出错 r2=56000050

函数调用过程分析(回溯栈分析在反汇编文件去搜索lr寄存器所代表的函数)

Stack: (0xc3e69e88 to 0xc3e6a000)
9e80:                   c3e69ebc c3e69e98 c008c888 bf000010 00000000 c0490620 
                        first_drv_open'sp    lr             chrdev_open'sp

9ea0: c3e320a0 c008c73c c0465e20 c3e36cb4 c3e69ee4 c3e69ec0 c0088e48 c008c74c 
                                                            lr    
                                                                                 
9ec0: c0490620 c3e69f04 00000003 ffffff9c c002b044 c06e0000 c3e69efc c3e69ee8 
      __dentry_open'sp

9ee0: c0088f64 c0088d58 00000000 00000002 c3e69f68 c3e69f00 c0088fb8 c0088f40 
      lr                nameidata_to_filp'sp                lr
      
9f00: c3e69f04 c3e36cb4 c0465e20 00000000 00000000 c3e79000 00000101 00000001 
      do_filp_open'sp

9f20: 00000000 c3e68000 c04c1468 c04c1460 ffffffe8 c06e0000 c3e69f68 c3e69f48 
9f40: c008916c c009ec70 00000003 00000000 c0490620 00000002 be94eee0 c3e69f94 
9f60: c3e69f6c c00892f4 c0088f88 00008520 be94eed4 0000860c 00008670 00000005 
               lr                do_sys_open'sp

9f80: c002b044 4013365c c3e69fa4 c3e69f98 c00893a8 c00892b0 00000000 c3e69fa8 
                                          lr                sys_open'sp

9fa0: c002aea0 c0089394 be94eed4 0000860c 00008720 00000002 be94eee0 00000001 
      lr                ret_fast_syscall'sp
                        
9fc0: be94eed4 0000860c 00008670 00000002 00008520 00000000 4013365c be94eea8 
9fe0: 00000000 be94ee84 0000266c 400c98e0 60000010 00008720 00000000 00000000 

怎么确定那个值是lr寄存器的值呢???这要看汇编了,对汇编基础要求比较高

c014e6ac:       e92dd800        stmdb   sp!, {fp, ip, lr, pc}

往栈里面写了四个值,而且从右边开始入栈,所以pc值最先入栈(这里栈是向下增长的,所以pc的地址值最大)所以lr的值为c008c888,根据这个值再去反汇编代码里面找相应的函数,如果在这里你不懂pc lr等寄存器值代表的含义是什么,我在以后的文章会给大家分析。

你可能感兴趣的:(程序调试)