Oops信息打印后,后面还有一个 crash dump 和系统重启的过程,本例为系统重启,在UBoot中保存core dump.
die-> crash_kexec
kernel/kexec.c:
void crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
* If the crash kernel was not located in a fixed area
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
if (mutex_trylock(&kexec_mutex)) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
else{
printk(KERN_ERR "Enter crash kexec !!\n");
struct pt_regs fixed_regs;
extern void machine_crash_swreset(void);
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
machine_crash_swreset();
}
mutex_unlock(&kexec_mutex);
}
}
die-> crash_kexec->machine_crash_shutdown
如果是使用Uboot做为crash kernel,代码运行 else分支。
arch/arm/kenerl/machine_kexec.c
void machine_crash_shutdown(struct pt_regs *regs)
{
unsigned long msecs;
local_irq_disable();
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
smp_call_function(machine_crash_nonpanic_core, NULL, false);
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
msecs--;
}
if (atomic_read(&waiting_for_crash_ipi) > 0)
printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
crash_save_cpu(regs, smp_processor_id());
machine_kexec_mask_interrupts();
printk(KERN_INFO "Loading crashdump kernel...\n");
}
这里有个SMP相关的操作:smp_call_function
kernel/smp.c
/**
* smp_call_function(): Run a function on all other CPUs.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* Returns 0.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
int smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
return 0;
}
void machine_crash_nonpanic_core(void *unused)
{
struct pt_regs regs;
crash_setup_regs(®s, NULL);
printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
smp_processor_id());
crash_save_cpu(®s, smp_processor_id());
flush_cache_all();
atomic_dec(&waiting_for_crash_ipi);
while (1)
cpu_relax();
}
其它的CPU保存寄存器和flush cache 后,进入死循环cpu_relax。
软件复位
machime_kexec.c文件中实现machine_crash_swreset,其中有个定义在 arch/arm/include/asm/system_misc.h
中的全局变量:extern void (*arm_pm_restart)(char str, const char *cmd);
void machine_crash_swreset(void)
{
printk(KERN_INFO "Software reset on panic!\n");
flush_cache_all();
outer_flush_all();
outer_disable();
arm_pm_restart(0, NULL);
}
在mach相关的代码中进行赋值
Core.c (arch\arm\mach-xxx): arm_pm_restart = xxx_restart;
static void xxx_restart(char mode, const char *cmd)
{
prcm_glb_soft_reset();
}
至此,kernel crash 发生后的所有事情分析完毕。后面会介绍怎样保存有效的kernel dump文件。