为了更好的理解和说明内核打印信息的含义,在这里,我自己写一个驱动first_drv(其实就是一个简单的电灯程序),引入一个段错误,源码如下:
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static struct class *firstdrv_class;
static struct class_device *firstdrv_class_dev;
volatile unsigned long *gpfcon = NULL;
volatile unsigned long *gpfdat = NULL;
static int first_drv_open(struct inode *inode, struct file *file)
{
//printk("first_drv_open\n");
/* 配置GPF4,5,6为输出 */
*gpfcon &= ~((0x3<<(4*2)) | (0x3<<(5*2)) | (0x3<<(6*2)));
*gpfcon |= ((0x1<<(4*2)) | (0x1<<(5*2)) | (0x1<<(6*2)));
return 0;
}
static ssize_t first_drv_write(struct file *file, const char __user *buf, size_t count, loff_t * ppos)
{
int val;
//printk("first_drv_write\n");
copy_from_user(&val, buf, count); // copy_to_user();
if (val == 1)
{
// 点灯
*gpfdat &= ~((1<<4) | (1<<5) | (1<<6));
}
else
{
// 灭灯
*gpfdat |= (1<<4) | (1<<5) | (1<<6);
}
return 0;
}
static struct file_operations first_drv_fops = {
.owner = THIS_MODULE, /* 这是一个宏,推向编译模块时自动创建的__this_module变量 */
.open = first_drv_open,
.write = first_drv_write,
};
int major;
static int first_drv_init(void)
{
major = register_chrdev(0, "first_drv", &first_drv_fops); // 注册, 告诉内核
firstdrv_class = class_create(THIS_MODULE, "firstdrv");
firstdrv_class_dev = class_device_create(firstdrv_class, NULL, MKDEV(major, 0), NULL, "xyz"); /* /dev/xyz */
gpfcon = (volatile unsigned long *)0x56000050; //(volatile unsigned long *)ioremap(0x56000050, 16);
gpfdat = gpfcon + 1;
return 0;
}
static void first_drv_exit(void)
{
unregister_chrdev(major, "first_drv"); // 卸载
class_device_unregister(firstdrv_class_dev);
class_destroy(firstdrv_class);
iounmap(gpfcon);
}
module_init(first_drv_init);
module_exit(first_drv_exit);
MODULE_LICENSE("GPL");
在first_drv_init和first_drv_open函数里面,我要去访问一个外部寄存器,但是我并没调用ioremap函数将外部寄存器的地址映射到内存空间,而是直接访问,导致段错误发生。本驱动的测试代码如下:
#include
#include
#include
#include
/* firstdrvtest on
* firstdrvtest off
*/
int main(int argc, char **argv)
{
int fd;
int val = 1;
fd = open("/dev/xyz", O_RDWR);
if (fd < 0)
{
printf("can't open!\n");
}
if (argc != 2)
{
printf("Usage :\n");
printf("%s \n", argv[0]);
return 0;
}
if (strcmp(argv[1], "on") == 0)
{
val = 1;
}
else
{
val = 0;
}
write(fd, &val, 4);
return 0;
}
下面分两种情景进行分析讨论
1. 驱动作为模块动态加载,insmod first_drv.ko,然后调用测试函数./firstdrvtest on (打开灯),这是内核打印信息如下:
Unable to handle kernel paging request at virtual address 56000050
pgd = c3eb0000
[56000050] *pgd=00000000
Internal error: Oops: 5 [#1]
Modules linked in: first_drv
CPU: 0 Not tainted (2.6.22.6 #1)
PC is at first_drv_open+0x18/0x3c [first_drv]
LR is at chrdev_open+0x14c/0x164
pc : [] lr : [] psr: a0000013
sp : c3c7be88 ip : c3c7be98 fp : c3c7be94
r10: 00000000 r9 : c3c7a000 r8 : c049abc0
r7 : 00000000 r6 : 00000000 r5 : c3e740c0 r4 : c06d41e0
r3 : bf000000 r2 : 56000050 r1 : bf000964 r0 : 00000000
Flags: NzCv IRQs on FIQs on Mode SVC_32 Segment user
Control: c000717f Table: 33eb0000 DAC: 00000015
Process firstdrvtest (pid: 777, stack limit = 0xc3c7a258)Stack: (0xc3c7be88 to 0xc3c7c000)
be80: c3c7bebc c3c7be98 c008d888 bf000010 00000000 c049abc0
bea0: c3e740c0 c008d73c c0474e20 c3e766a8 c3c7bee4 c3c7bec0 c0089e48 c008d74c
bec0: c049abc0 c3c7bf04 00000003 ffffff9c c002c044 c3d10000 c3c7befc c3c7bee8
bee0: c0089f64 c0089d58 00000000 00000002 c3c7bf68 c3c7bf00 c0089fb8 c0089f40
bf00: c3c7bf04 c3e766a8 c0474e20 00000000 00000000 c3eb1000 00000101 00000001
bf20: 00000000 c3c7a000 c04a7468 c04a7460 ffffffe8 c3d10000 c3c7bf68 c3c7bf48
bf40: c008a16c c009fc70 00000003 00000000 c049abc0 00000002 bec1fee0 c3c7bf94
bf60: c3c7bf6c c008a2f4 c0089f88 00008520 bec1fed4 0000860c 00008670 00000005
bf80: c002c044 4013365c c3c7bfa4 c3c7bf98 c008a3a8 c008a2b0 00000000 c3c7bfa8
bfa0: c002bea0 c008a394 bec1fed4 0000860c 00008720 00000002 bec1fee0 00000001
bfc0: bec1fed4 0000860c 00008670 00000002 00008520 00000000 4013365c bec1fea8
bfe0: 00000000 bec1fe84 0000266c 400c98e0 60000010 00008720 00000000 00000000
从这个Backtrace: (回溯)
[] (first_drv_open+0x0/0x3c [first_drv]) from [] (chrdev_open+0x14c/0x164)
[] (chrdev_open+0x0/0x164) from [] (__dentry_open+0x100/0x1e8)
r8:c3e766a8 r7:c0474e20 r6:c008d73c r5:c3e740c0 r4:c049abc0
[] (__dentry_open+0x0/0x1e8) from [] (nameidata_to_filp+0x34/0x48)
[] (nameidata_to_filp+0x0/0x48) from [] (do_filp_open+0x40/0x48)
r4:00000002
[] (do_filp_open+0x0/0x48) from [] (do_sys_open+0x54/0xe4)
r5:bec1fee0 r4:00000002
[] (do_sys_open+0x0/0xe4) from [] (sys_open+0x24/0x28)
[] (sys_open+0x0/0x28) from [] (ret_fast_syscall+0x0/0x2c)
Code: e24cb004 e59f1024 e3a00000 e5912000 (e5923000)
Segmentation fault
打印信息,明显知道,内核访问56000050这个地址的时候出错了,并且pc指针位于first_drv_open函数的0x18处(0x3c是这个函数的总大小)pc=bf000018,注意!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
对于这个值正常处理flow如下:
我们要首先确定这个值是属于内核还是insmod外加的模块(在这里当然是外加的模块),怎么确认呢???先判断这个值是否属于内核地址,可以查看system.map(编译内核源码生成的)来确定内核的函数的地址范围:c0004000~c03265a4,显然pc这个值不属于内核函数范围,则它是insmod加载的驱动程序。
PC is at first_drv_open+0x18/0x3c [first_drv]
很多时候这句log并不会打印出来,不会告诉你pc值所在函数,需要我们手动去找,正常处理flow为,执行cat /proc/kallsyms 打印出内核的所有函数符号表(这玩意就相当于DN和IP的关系),找到与pc值相近的一个地址(小于等于pc)
first_drv.dis文件里 insmod后
00000000 : bf000000 t first_drv_open [first_drv]
00000018 pc = bf000018
first_drv.ko: file format elf32-littlearm
Disassembly of section .text:
00000000 :
0: e1a0c00d mov ip, sp
4: e92dd800 stmdb sp!, {fp, ip, lr, pc}
8: e24cb004 sub fp, ip, #4 ; 0x4
c: e59f1024 ldr r1, [pc, #36] ; 38 <__mod_vermagic5>
10: e3a00000 mov r0, #0 ; 0x0
14: e5912000 ldr r2, [r1]
18: e5923000 ldr r3, [r2] // 在这里出错 r2=56000050
根据汇编查找源码,找到出错原因!!!!
Modules linked in:
CPU: 0 Not tainted (2.6.22.6 #2)
PC is at first_drv_open+0x18/0x3c
LR is at chrdev_open+0x14c/0x164
pc : [] lr : [] psr: a0000013
sp : c3a03e88 ip : c3a03e98 fp : c3a03e94
r10: 00000000 r9 : c3a02000 r8 : c03f3c60
r7 : 00000000 r6 : 00000000 r5 : c38a0c50 r4 : c3c1e780
r3 : c014e6a8 r2 : 56000050 r1 : c031a47c r0 : 00000000
Flags: NzCv IRQs on FIQs on Mode SVC_32 Segment user
Control: c000717f Table: 339f0000 DAC: 00000015
Process firstdrvtest (pid: 750, stack limit = 0xc3a02258)
同样,根据system.map,发现pc是属于内核函数范围,并且在里面找到pc=c014e6c0相近的函数c014e6a8
反汇编内核 arm-linux-objdump -D vmlinux > vmlinux.dis 搜索c014e6c0
c014e6a8 :
c014e6a8: e1a0c00d mov ip, sp
c014e6ac: e92dd800 stmdb sp!, {fp, ip, lr, pc}
c014e6b0: e24cb004 sub fp, ip, #4 ; 0x4
c014e6b4: e59f1024 ldr r1, [pc, #36] ; c014e6e0 <.text+0x1276e0>
c014e6b8: e3a00000 mov r0, #0 ; 0x0
c014e6bc: e5912000 ldr r2, [r1]
c014e6c0: e5923000 ldr r3, [r2] // 在此出错 r2=56000050
函数调用过程分析(回溯栈分析在反汇编文件去搜索lr寄存器所代表的函数)
Stack: (0xc3e69e88 to 0xc3e6a000)
9e80: c3e69ebc c3e69e98 c008c888 bf000010 00000000 c0490620
first_drv_open'sp lr chrdev_open'sp
9ea0: c3e320a0 c008c73c c0465e20 c3e36cb4 c3e69ee4 c3e69ec0 c0088e48 c008c74c
lr
9ec0: c0490620 c3e69f04 00000003 ffffff9c c002b044 c06e0000 c3e69efc c3e69ee8
__dentry_open'sp
9ee0: c0088f64 c0088d58 00000000 00000002 c3e69f68 c3e69f00 c0088fb8 c0088f40
lr nameidata_to_filp'sp lr
9f00: c3e69f04 c3e36cb4 c0465e20 00000000 00000000 c3e79000 00000101 00000001
do_filp_open'sp
9f20: 00000000 c3e68000 c04c1468 c04c1460 ffffffe8 c06e0000 c3e69f68 c3e69f48
9f40: c008916c c009ec70 00000003 00000000 c0490620 00000002 be94eee0 c3e69f94
9f60: c3e69f6c c00892f4 c0088f88 00008520 be94eed4 0000860c 00008670 00000005
lr do_sys_open'sp
9f80: c002b044 4013365c c3e69fa4 c3e69f98 c00893a8 c00892b0 00000000 c3e69fa8
lr sys_open'sp
9fa0: c002aea0 c0089394 be94eed4 0000860c 00008720 00000002 be94eee0 00000001
lr ret_fast_syscall'sp
9fc0: be94eed4 0000860c 00008670 00000002 00008520 00000000 4013365c be94eea8
9fe0: 00000000 be94ee84 0000266c 400c98e0 60000010 00008720 00000000 00000000
怎么确定那个值是lr寄存器的值呢???这要看汇编了,对汇编基础要求比较高
c014e6ac: e92dd800 stmdb sp!, {fp, ip, lr, pc}
往栈里面写了四个值,而且从右边开始入栈,所以pc值最先入栈(这里栈是向下增长的,所以pc的地址值最大)所以lr的值为c008c888,根据这个值再去反汇编代码里面找相应的函数,如果在这里你不懂pc lr等寄存器值代表的含义是什么,我在以后的文章会给大家分析。