kernel crash 发生后的那些事(四)

Oops信息打印后,后面还有一个 crash dump 和系统重启的过程,本例为系统重启,在UBoot中保存core dump.

die-> crash_kexec

kernel/kexec.c:
void crash_kexec(struct pt_regs *regs)
{
    /* Take the kexec_mutex here to prevent sys_kexec_load
     * running on one cpu from replacing the crash kernel
     * we are using after a panic on a different cpu.
     *
     * If the crash kernel was not located in a fixed area
     * of memory the xchg(&kexec_crash_image) would be
     * sufficient.  But since I reuse the memory...
     */
    if (mutex_trylock(&kexec_mutex)) {
        if (kexec_crash_image) {
            struct pt_regs fixed_regs;

            crash_setup_regs(&fixed_regs, regs);
            crash_save_vmcoreinfo();
            machine_crash_shutdown(&fixed_regs);
            machine_kexec(kexec_crash_image);
        }
        else{
            printk(KERN_ERR "Enter crash kexec !!\n");
            struct pt_regs fixed_regs;
            extern void machine_crash_swreset(void);
            crash_setup_regs(&fixed_regs, regs);
            crash_save_vmcoreinfo();
            machine_crash_shutdown(&fixed_regs);
            machine_crash_swreset();
        }
        mutex_unlock(&kexec_mutex);
    }
}

die-> crash_kexec->machine_crash_shutdown

如果是使用Uboot做为crash kernel,代码运行 else分支。
arch/arm/kenerl/machine_kexec.c
void machine_crash_shutdown(struct pt_regs *regs)
{
    unsigned long msecs;

    local_irq_disable();

    atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
    smp_call_function(machine_crash_nonpanic_core, NULL, false);
    msecs = 1000; /* Wait at most a second for the other cpus to stop */
    while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
        mdelay(1);
        msecs--;
    }
    if (atomic_read(&waiting_for_crash_ipi) > 0)
        printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");

    crash_save_cpu(regs, smp_processor_id());
    machine_kexec_mask_interrupts();

    printk(KERN_INFO "Loading crashdump kernel...\n");
}

这里有个SMP相关的操作:smp_call_function

kernel/smp.c
/**
 * smp_call_function(): Run a function on all other CPUs.
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed
 *        on other CPUs.
 *
 * Returns 0.
 *
 * If @wait is true, then returns once @func has returned; otherwise
 * it returns just before the target cpu calls @func.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler.
 */
int smp_call_function(smp_call_func_t func, void *info, int wait)
{
    preempt_disable();
    smp_call_function_many(cpu_online_mask, func, info, wait);
    preempt_enable();

    return 0;
}


void machine_crash_nonpanic_core(void *unused)
{
    struct pt_regs regs;

    crash_setup_regs(&regs, NULL);
    printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
           smp_processor_id());
    crash_save_cpu(&regs, smp_processor_id());
    flush_cache_all();

    atomic_dec(&waiting_for_crash_ipi);
    while (1)
        cpu_relax();
}

其它的CPU保存寄存器和flush cache 后,进入死循环cpu_relax。

软件复位

machime_kexec.c文件中实现machine_crash_swreset,其中有个定义在 arch/arm/include/asm/system_misc.h
中的全局变量:extern void (*arm_pm_restart)(char str, const char *cmd);
void machine_crash_swreset(void)
{
    printk(KERN_INFO "Software reset on panic!\n");

    flush_cache_all();
    outer_flush_all();
    outer_disable();
    arm_pm_restart(0, NULL);
}
在mach相关的代码中进行赋值
Core.c (arch\arm\mach-xxx):    arm_pm_restart = xxx_restart;

static void xxx_restart(char mode, const char *cmd)
{
    prcm_glb_soft_reset();

}

至此,kernel crash 发生后的所有事情分析完毕。后面会介绍怎样保存有效的kernel dump文件。

你可能感兴趣的:(Crash,kenerl)