当kernel出现错误时,会打印堆栈信息,我们可以根据信息追溯问题的来源,该函数实行如下
/**
* dump_stack - dump the current task information and its stack trace
*
* Architectures can override this implementation by implementing its own.
*/
#ifdef CONFIG_SMP
static atomic_t dump_lock = ATOMIC_INIT(-1);
asmlinkage __visible void dump_stack(void)
{
unsigned long flags;
int was_locked;
int old;
int cpu;
/*
* Permit this cpu to perform nested stack dumps while serialising
* against other CPUs
*/
retry:
local_irq_save(flags);
cpu = smp_processor_id();
old = atomic_cmpxchg(&dump_lock, -1, cpu);
if (old == -1) {
was_locked = 0;
} else if (old == cpu) {
was_locked = 1;
} else {
local_irq_restore(flags);
cpu_relax();
goto retry;
}
__dump_stack();
if (!was_locked)
atomic_set(&dump_lock, -1);
local_irq_restore(flags);
}
#else
asmlinkage __visible void dump_stack(void)
{
__dump_stack();
}
#endif
EXPORT_SYMBOL(dump_stack);
在追踪代码的调用时,我们也可以使用dump_stack函数来辅助我们分析。
类似的函数还有BUG,BUG_ON,WARN,WANR_ON。
调用BUG/BUG_ON会导致系统死机,注意
在没定义HAVE_ARCH_BUG_ON,BUG定义如下
#ifdef CONFIG_BUG
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
panic("BUG!"); \
} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif
#endif
/**
* panic - halt the system
* @fmt: The text string to print
*
* Display a message, then perform cleanups.
*
* This function never returns.
*/
void panic(const char *fmt, ...)
{
static DEFINE_SPINLOCK(panic_lock);
static char buf[1024];
va_list args;
long i, i_next = 0;
int state = 0;
/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
* after the panic_lock is acquired) from invoking panic again.
*/
local_irq_disable();
/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*
* Only one CPU is allowed to execute the panic code from here. For
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
*/
if (!spin_trylock(&panic_lock))
panic_smp_self_stop();
console_verbose();
bust_spinlocks(1);
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
pr_emerg("Kernel panic - not syncing: %s\n", buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
*/
if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
dump_stack();
#endif
#ifdef CONFIG_SPRD_SYSDUMP
sysdump_enter(0,buf,NULL);
#else
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
* If we want to run this after calling panic_notifiers, pass
* the "crash_kexec_post_notifiers" option to the kernel.
*/
if (!crash_kexec_post_notifiers)
crash_kexec(NULL);
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a panic
* situation.
*/
smp_send_stop();
#endif
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
kmsg_dump(KMSG_DUMP_PANIC);
/*
* If you doubt kdump always works fine in any situation,
* "crash_kexec_post_notifiers" offers you a chance to run
* panic_notifiers and dumping kmsg before kdump.
* Note: since some panic_notifiers can make crashed kernel
* more unstable, it can increase risks of the kdump failure too.
*/
if (crash_kexec_post_notifiers)
crash_kexec(NULL);
bust_spinlocks(0);
/*
* We may have ended up stopping the CPU holding the lock (in
* smp_send_stop()) while still having some valuable data in the console
* buffer. Try to acquire the lock then release it regardless of the
* result. The release will also print the buffers out. Locks debug
* should be disabled to avoid reporting bad unlock balance when
* panic() is not being callled from OOPS.
*/
debug_locks_off();
console_flush_on_panic();
if (!panic_blink)
panic_blink = no_blink;
if (panic_timeout > 0) {
/*
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.
*/
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
touch_nmi_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}
if (panic_timeout != 0) {
/*
* This will not be a clean reboot, with everything
* shutting down. But if there is a chance of
* rebooting the system it will be rebooted.
*/
emergency_restart();
}
#ifdef __sparc__
{
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
{
unsigned long caller;
caller = (unsigned long)__builtin_return_address(0);
disabled_wait(caller);
}
#endif
pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf);
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}
EXPORT_SYMBOL(panic);
默认都有定义 arch/arm/include/asm/bug.h
#define BUG() _BUG(__FILE__, __LINE__, BUG_INSTR_VALUE)
#define _BUG(file, line, value) __BUG(file, line, value)
#ifdef CONFIG_DEBUG_BUGVERBOSE
/*
* The extra indirection is to ensure that the __FILE__ string comes through
* OK. Many version of gcc do not support the asm %c parameter which would be
* preferable to this unpleasantness. We use mergeable string sections to
* avoid multiple copies of the string appearing in the kernel image.
*/
#define __BUG(__file, __line, __value) \
do { \
asm volatile("1:\t" BUG_INSTR(__value) "\n" \
".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
"2:\t.asciz " #__file "\n" \
".popsection\n" \
".pushsection __bug_table,\"a\"\n" \
".align 2\n" \
"3:\t.word 1b, 2b\n" \
"\t.hword " #__line ", 0\n" \
".popsection"); \
unreachable(); \
} while (0)
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN_printf(format); \
unlikely(__ret_warn_on); \
})
#ifndef WARN_ON
#define WARN_ON(condition) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN(); \
unlikely(__ret_warn_on); \
})
#endif
#define __WARN() warn_slowpath_null(__FILE__, __LINE__)
void warn_slowpath_null(const char *file, int line)
{
warn_slowpath_common(file, line, __builtin_return_address(0),
TAINT_WARN, NULL);
}
static void warn_slowpath_common(const char *file, int line, void *caller,
unsigned taint, struct slowpath_args *args)
{
disable_trace_on_warning();
pr_warn("------------[ cut here ]------------\n");
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n",
raw_smp_processor_id(), current->pid, file, line, caller);
if (args)
vprintk(args->fmt, args->args);
if (panic_on_warn) {
/*
* This thread may hit another WARN() in the panic path.
* Resetting this prevents additional WARN() from panicking the
* system on this thread. Other threads are blocked by the
* panic_mutex in panic().
*/
panic_on_warn = 0;
panic("panic_on_warn set ...\n");
}
print_modules();
dump_stack();
print_oops_end_marker();
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
}