@Linux内核之BUG_ON()和WARN_ON()
内核中有许多地方调用类似BUG_ON()
的语句,它非常像一个内核运行时的断言,意味着本来不该执行到BUG_ON()
这条语句,一旦执行即抛出Oops
,导致栈的回溯和错误信息的打印。为什么这些声明会导致 oops
跟硬件的体系结构是相关的。大部分体系结构把BUG()
和BUG_ON()
定义成某种非法操作,这样自然会产生需要的oops
。你可以把这些调用当作断言使用,想要断言某种情况不该发生。
BUG_ON()
的定义为:
/*include/asm-generic/bug.h*/
/*
* Don't use BUG() or BUG_ON() unless there's really no way out; one
* example might be detecting data structure corruption in the middle
* of an operation that can't be backed out of. If the (sub)system
* can somehow continue operating, perhaps with reduced functionality,
* it's probably not BUG-worthy.
*
* If you're tempted to BUG(), think again: is completely giving up
* really the *only* solution? There are usually better options, where
* users don't need to reboot ASAP and can mostly shut down cleanly.
*/
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif
其中panic()
定义在kernel/panic.c
中,会导致内核崩溃,并打印Oops
。
/**
* panic - halt the system
* @fmt: The text string to print
*
* Display a message, then perform cleanups.
*
* This function never returns.
*/
void panic(const char *fmt, ...)
{
static char buf[1024];
va_list args;
long i, i_next = 0, len;
int state = 0;
int old_cpu, this_cpu;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
if (panic_on_warn) {
/*
* This thread may hit another WARN() in the panic path.
* Resetting this prevents additional WARN() from panicking the
* system on this thread. Other threads are blocked by the
* panic_mutex in panic().
*/
panic_on_warn = 0;
}
/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
preempt_disable_notrace();
/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*
* Only one CPU is allowed to execute the panic code from here. For
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
*
* `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
* comes here, so go ahead.
* `old_cpu == this_cpu' means we came from nmi_panic() which sets
* panic_cpu to this CPU. In this case, this is also the 1st CPU.
*/
this_cpu = raw_smp_processor_id();
old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
panic_smp_self_stop();
console_verbose();
bust_spinlocks(1);
va_start(args, fmt);
len = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
if (len && buf[len - 1] == '\n')
buf[len - 1] = '\0';
pr_emerg("Kernel panic - not syncing: %s\n", buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
*/
if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
dump_stack();
#endif
/*
* If kgdb is enabled, give it a chance to run before we stop all
* the other CPUs or else we won't be able to debug processes left
* running on them.
*/
kgdb_panic(buf);
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
* If we want to run this after calling panic_notifiers, pass
* the "crash_kexec_post_notifiers" option to the kernel.
*
* Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (!_crash_kexec_post_notifiers) {
__crash_kexec(NULL);
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a
* panic situation.
*/
smp_send_stop();
} else {
/*
* If we want to do crash dump after notifier calls and
* kmsg_dump, we will need architecture dependent extra
* works in addition to stopping other CPUs.
*/
crash_smp_send_stop();
}
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
panic_print_sys_info(false);
kmsg_dump(KMSG_DUMP_PANIC);
/*
* If you doubt kdump always works fine in any situation,
* "crash_kexec_post_notifiers" offers you a chance to run
* panic_notifiers and dumping kmsg before kdump.
* Note: since some panic_notifiers can make crashed kernel
* more unstable, it can increase risks of the kdump failure too.
*
* Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (_crash_kexec_post_notifiers)
__crash_kexec(NULL);
#ifdef CONFIG_VT
unblank_screen();
#endif
console_unblank();
/*
* We may have ended up stopping the CPU holding the lock (in
* smp_send_stop()) while still having some valuable data in the console
* buffer. Try to acquire the lock then release it regardless of the
* result. The release will also print the buffers out. Locks debug
* should be disabled to avoid reporting bad unlock balance when
* panic() is not being callled from OOPS.
*/
debug_locks_off();
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
panic_print_sys_info(true);
if (!panic_blink)
panic_blink = no_blink;
if (panic_timeout > 0) {
/*
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.
*/
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
touch_nmi_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}
if (panic_timeout != 0) {
/*
* This will not be a clean reboot, with everything
* shutting down. But if there is a chance of
* rebooting the system it will be rebooted.
*/
if (panic_reboot_mode != REBOOT_UNDEFINED)
reboot_mode = panic_reboot_mode;
emergency_restart();
}
#ifdef __sparc__
{
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
"twice on console to return to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
disabled_wait();
#endif
pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
/* Do not scroll important messages printed above */
suppress_printk = 1;
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}
EXPORT_SYMBOL(panic);
调用panic()
不但会打印错误消息(Oops
)而且还会挂起整个系统。显然,你只应该在极端恶劣的情况下使用它。
但是对于BUG_ON()
,只有当括号内的条件成立时,才抛出Oops
。
除了BUG_ON()
外,内核有个稍微弱一些WARN_ON()
,在括号中的条件成立时,内核会抛出栈回溯,但是不会panic()
,这通常用于内核抛出一个警告,暗示某种不太合理的事情发生了。
而WARN_ON
则是调用dump_stack
,打印堆栈信息,不会OOPS
,具体的调用情况与中断有关,可以通过linux中断机制来学习。
有时,WARN_ON()
也可以作为一个调试技巧。比如,进到内核某个函数后,不知道这个函数是怎么一级一级被调用进来的,可以在该函数中加入一个WARN_ON(1)
。
/*
* Don't use BUG() or BUG_ON() unless there's really no way out; one
* example might be detecting data structure corruption in the middle
* of an operation that can't be backed out of. If the (sub)system
* can somehow continue operating, perhaps with reduced functionality,
* it's probably not BUG-worthy.
*
* If you're tempted to BUG(), think again: is completely giving up
* really the *only* solution? There are usually better options, where
* users don't need to reboot ASAP and can mostly shut down cleanly.
*/
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif
/*
* WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
* significant kernel issues that need prompt attention if they should ever
* appear at runtime.
*
* Do not use these macros when checking for invalid external inputs
* (e.g. invalid system call arguments, or invalid data coming from
* network/devices), and on transient conditions like ENOMEM or EAGAIN.
* These macros should be used for recoverable kernel issues only.
* For invalid external inputs, transient conditions, etc use
* pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
* Do not include "BUG"/"WARNING" in format strings manually to make these
* conditions distinguishable from kernel issues.
*
* Use the versions with printk format strings to provide better diagnostics.
*/
#ifndef __WARN_FLAGS
extern __printf(4, 5)
void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
const char *fmt, ...);
#define __WARN() __WARN_printf(TAINT_WARN, NULL)
#define __WARN_printf(taint, arg...) do { \
instrumentation_begin(); \
warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \
instrumentation_end(); \
} while (0)
#else
extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
#define __WARN_printf(taint, arg...) do { \
instrumentation_begin(); \
__warn_printk(arg); \
__WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
instrumentation_end(); \
} while (0)
#define WARN_ON_ONCE(condition) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN_FLAGS(BUGFLAG_ONCE | \
BUGFLAG_TAINT(TAINT_WARN)); \
unlikely(__ret_warn_on); \
})
#endif
/* used internally by panic.c */
#ifndef WARN_ON
#define WARN_ON(condition) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN(); \
unlikely(__ret_warn_on); \
})
#endif
#ifndef WARN
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN_printf(TAINT_WARN, format); \
unlikely(__ret_warn_on); \
})
#endif
#define WARN_TAINT(condition, taint, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN_printf(taint, format); \
unlikely(__ret_warn_on); \
})
#ifndef WARN_ON_ONCE
#define WARN_ON_ONCE(condition) \
DO_ONCE_LITE_IF(condition, WARN_ON, 1)
#endif
#define WARN_ONCE(condition, format...) \
DO_ONCE_LITE_IF(condition, WARN, 1, format)
#define WARN_TAINT_ONCE(condition, taint, format...) \
DO_ONCE_LITE_IF(condition, WARN_TAINT, 1, taint, format)
#ifndef __WARN_FLAGS
void warn_slowpath_fmt(const char *file, int line, unsigned taint,
const char *fmt, ...)
{
struct warn_args args;
pr_warn(CUT_HERE);
if (!fmt) {
__warn(file, line, __builtin_return_address(0), taint,
NULL, NULL);
return;
}
args.fmt = fmt;
va_start(args.args, fmt);
__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
va_end(args.args);
}
EXPORT_SYMBOL(warn_slowpath_fmt);
void __warn(const char *file, int line, void *caller, unsigned taint,
struct pt_regs *regs, struct warn_args *args)
{
disable_trace_on_warning();
if (file)
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
raw_smp_processor_id(), current->pid, file, line,
caller);
else
pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
raw_smp_processor_id(), current->pid, caller);
if (args)
vprintk(args->fmt, args->args);
print_modules();
if (regs)
show_regs(regs);
if (panic_on_warn)
panic("panic_on_warn set ...\n");
if (!regs)
dump_stack();
print_irqtrace_events(current);
print_oops_end_marker();
trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller);
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
}