最近在linux4.9 arm64遇到了bad mode的kernel oops
oops的内容大概如下,而且oops过后看起来系统并没有异常,oops出现的概率比较小,而且每次oops打印的信息中task 都是不同的,oops之后,看起来大部分时候系统还是可以正常运行的
[ 1259.654597] Bad mode in Error handler detected, code 0xbf000002 -- SError
[ 1259.661357] CPU: 12 PID: 2293 Comm: mate-settings-d Not tainted
4.1.15-1.el7.aarch64 #2
[ 1259.669320] Hardware name: xxxx
[ 1259.675209] task: ffffffc8c9bd1700 ti: ffffffc8c9e4c000
task.ti: ffffffc8c9e4c000
[ 1259.682665] PC is at 0x7f9c51abd8
[ 1259.685961] LR is at 0x7f942f9828
[ 1259.689259] pc : [<0000007f9c51abd8>]
lr : [<0000007f942f9828>] pstate: 80000000
[ 1259.696616] sp : ffffffc8c9e4fff0
首先先定位出现这段LOG的代码位置在traps.c,arm64的异常向量表中才有对这个函数的调用,如下图
bad_mode 的打印信息SError 和 code 都是从register esr_el1中获取的,能得到SError的只有el1 和el0的invalid_error打印的
/arch/arm64/kernel/traps.c
/*
* bad_mode handles the impossible case in the exception vector. This is always
* fatal.
*/
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
console_verbose();
pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
die("Oops - bad mode", regs, 0);
local_irq_disable();
panic("bad mode");
}
/arch/arm64/kernel/entry.s
/*
* Bad Abort numbers
*-----------------
*/
#define BAD_SYNC 0
#define BAD_IRQ 1
#define BAD_FIQ 2
#define BAD_ERROR 3
//el1_sync 函数
el1_sync:
el1_inv:
// TODO: add support for undefined instructions in kernel mode
enable_dbg
mov x0, sp
mov x2, x1
mov x1, #BAD_SYNC
b bad_mode
ENDPROC(el1_sync)
//inv_entry 函数
/*
* Invalid mode handlers
*/
.macro inv_entry, el, reason, regsize = 64
kernel_entry \el, \regsize
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1//bad_mode 函数的参数传递
b bad_mode
.endm
el0_sync_invalid:
inv_entry 0, BAD_SYNC
ENDPROC(el0_sync_invalid)
el0_irq_invalid:
inv_entry 0, BAD_IRQ
ENDPROC(el0_irq_invalid)
el0_fiq_invalid:
inv_entry 0, BAD_FIQ
ENDPROC(el0_fiq_invalid)
el0_error_invalid:
inv_entry 0, BAD_ERROR
ENDPROC(el0_error_invalid)
#ifdef CONFIG_COMPAT
el0_fiq_invalid_compat:
inv_entry 0, BAD_FIQ, 32
ENDPROC(el0_fiq_invalid_compat)
el0_error_invalid_compat:
inv_entry 0, BAD_ERROR, 32
ENDPROC(el0_error_invalid_compat)
#endif
el1_sync_invalid:
inv_entry 1, BAD_SYNC
ENDPROC(el1_sync_invalid)
el1_irq_invalid:
inv_entry 1, BAD_IRQ
ENDPROC(el1_irq_invalid)
el1_fiq_invalid:
inv_entry 1, BAD_FIQ
ENDPROC(el1_fiq_invalid)
el1_error_invalid:
inv_entry 1, BAD_ERROR
ENDPROC(el1_error_invalid)
到底SError是ARM的一种什么异常,AArch64(ARM64)架构中,主要包括如下4中类型的异常:
SError本质上是一种异步外部abort(asynchronous external abort)。所谓异步,就说是发生异常时硬件(相关的寄存器)不能提供有效信息用于分析定位,异常发生时的指令,并不是导致异常的指令。外部意味着异常来自于外部存储系统(相较于CPU来说,MMU是内部的)。通常是硬件触发的问题,比如硬件的clock出问题或者硬件本身的问题导致的bus访问硬件时出现问题。
Linux内核中,对SError进行了捕获,设置了相应的中断向量,当并未做实际的处理,只是上报异常,并终止进程会内核,因为对于内核来说,SError是致命的,内核自身无法做相应的修复操作,内核不知道具体原因,也不知道如何修复。
分析错误打印: Bad mode in Error handler detected, code 0xbf000002 – SError
可以知道,发生了SError(也就是System Error异常),错误码(ESR寄存器内容)为:0xbf000002
从ARM developer上查到的ESR寄存器的描述
The ESR_EL1 holds syndrome information for an exception taken to EL1.
ESR_EL1 is a 32-bit register, and is part of the Exception and fault handling registers functional group.
Figure B2-34 ESR_EL1 bit assignments
EC, [31:26]
Exception Class. Indicates the reason for the exception that this register holds information about.
IL, [25]
Instruction Length for synchronous exceptions. The possible values are:
|
16-bit. |
1 |
32-bit. |
This field is 1
for the SError interrupt, instruction aborts, misaligned PC, Stack pointer misalignment, data aborts for which the ISV bit is 0
, exceptions caused by an illegal instruction set state, and exceptions using the 0x00
Exception Class.
ISS, [24:0]
Syndrome information.
由以上信息得出前六位bit[31:26]为101111,对应具体的异常类型,查看ArmV8手册:
101111 SError interrupt即系统异常中断。
ISV, bit [24] Instruction syndrome valid. Indicates whether the rest of the syndrome information in this register is valid. 0 No valid instruction syndrome. ISS[23:0] are RES0. 1 ISS[23:0] hold a valid instruction syndrome.
本code中,该位为1,说明bit[23:0]中存放了instruction syndrome(出错指令的具体信息)
IS, bits [23:0] IMPLEMENTATION DEFINED syndrome information that can be used to provide additional information about the SError interrupt. Only valid if bit[24] of this register is 1. If bit[24] is 0, this field is RES0.
ELR_ELn n代表不同的运行优先级,这个寄存器存储着触发异常时候的指令
如何dump 该register尝试改了下entry.s,只有inv_entry 这里会有BAD_ERROR的异常会走到,所以做了如下修改,发现show_regs打印的异常发生的时候的PC就是从elr_el1的copy。
所以bad_mode dump的信息就是all kernel have了,没有想到有效的方法,只能压测看下error附近的LOG试着夹下code
/*
* Invalid mode handlers
*/
.macro inv_entry, el, reason, regsize = 64
kernel_entry \el, \regsize
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1
mrs x3, elr_el1
//add this line to pass para to bad_mode function
//SError always call this function
b bad_mode
.endm
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr, u64 elr_regs)
//add a para
{
siginfo_t info;
void __user *pc = (void __user *)instruction_pointer(regs);
console_verbose();
pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
pr_cirt("dump elr %llx\n",elr_regs);
//add printk to dump this register
__show_regs(regs);