Linux:soft lockup 检测机制

1. 前言

限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。

2. 分析背景

本文分析基于 linux-4.14.132 内核代码分析,运行环境 Ubuntu 16.04.4 LTS + QEMU + ARM vexpress-a9rootfs 基于 ubuntu-base-16.04-core-armhf.tar.gz 制作。

3. soft lockup 机制

3.1 什么是 soft lockup ?

soft lockup 是指某段代码一直占住当前 CPU ,其它进程无法在当前 CPU 上得到调度的情形。soft lockup 在不同的抢占模式下,有着不同的情形。

3.2 各种抢占配置下的 soft lockup

3.2.1 CONFIG_PREEMPT_NONE 配置下的 soft lockup

CONFIG_PREEMPT_NONE 不支持内核态抢占,致力于提高吞吐量,通常用于服务器。 。在 CONFIG_PREEMPT_NONE 配置下,包含死循环或等同情形的代码,可导致 soft lockup

3.2.2 CONFIG_PREEMPT 配置下的 soft lockup

CONFIG_PREEMPT 支持内核态抢占,用于低延迟的桌面系统。在CONFIG_PREEMPT 配置下,较长时间禁用抢占的进程,可导致 soft lockup

3.2.3 CONFIG_PREEMPT_VOLUNTARY 配置下的 soft lockup

CONFIG_PREEMPT_VOLUNTARY 不支持内核态抢占,通常用于桌面系统。相对于配置 CONFIG_PREEMPT_NONE 的情形,在一些可能导致睡眠的代码路径上,插入了一些调度点,以降低延迟。

/*
 * include/linux/kernel.h
 */

...

#ifdef CONFIG_PREEMPT_VOLUNTARY
extern int _cond_resched(void);
# define might_resched() _cond_resched()
#else
# define might_resched() do { } while (0)
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  void ___might_sleep(const char *file, int line, int preempt_offset);
  void __might_sleep(const char *file, int line, int preempt_offset);
/**
 * might_sleep - annotation for functions that can sleep
 *
 * this macro will print a stack trace if it is executed in an atomic
 * context (spinlock, irq-handler, ...).
 *
 * This is a useful debugging help to be able to catch problems early and not
 * be bitten later when the calling function happens to sleep when it is not
 * supposed to.
 */ 
# define might_sleep() \
	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
# define sched_annotate_sleep()	(current->task_state_change = 0)
#else
  static inline void ___might_sleep(const char *file, int line,
				   int preempt_offset) { }
  static inline void __might_sleep(const char *file, int line,
				   int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
# define sched_annotate_sleep() do { } while (0)
#endif

#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)

...

CONFIG_PREEMPT_VOLUNTARY 配置下,包含死循环或等同情形的代码,可导致 soft lockup

3.3 soft lockup 的实现

3.3.1 创建 soft lockup watchdog 每 CPU 线程

为所有的 CPU 创建每 CPU 的 watchdog 内核线程,该内核线程每次被调度时(即定时器到期时)更新一次 watchdog时间戳,同时再次启动定时器;同时,定时器到期时,对比 watchdog时间戳当前时间戳,如果(当前时间戳 - watchdog时间戳 >= 设定的soft lockup阈值),表示当前 CPU 上的当前进程已经占住 CPU 较长时间,如此报告一个 soft lockup 问题 。过程中涉及到内核线程创建细节部分,可参考 Linux: 内核线程简析。

void __init lockup_detector_init(void)
{
	...
	cpumask_copy(&watchdog_cpumask, cpu_possible_mask);

	...
	lockup_detector_setup();
}
static struct smp_hotplug_thread watchdog_threads = {
	.store			= &softlockup_watchdog,
	.thread_should_run	= watchdog_should_run,
	.thread_fn		= watchdog,
	.thread_comm		= "watchdog/%u",
	.setup			= watchdog_enable,
	.cleanup		= watchdog_cleanup,
	.park			= watchdog_disable,
	.unpark			= watchdog_enable,
};

static __init void lockup_detector_setup(void)
{
	...

	lockup_detector_update_enable();

	...
	/*
	 * 创建每 CPU 的 watchdog 内核线程:
	 * . 负责更新每 CPU 的 watchdog 时间戳; 
	 * . 每次更新 watchdog 时间戳的同时启动一个定时器,
	 *   该定时器检测 watchdog 时间戳更新的时间间隔,如
	 *   果更新间隔超过设定的阈值,则报告 soft lockup 问题。
	 */
	ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
						     &watchdog_allowed_mask);
	if (ret) {
		pr_err("Failed to initialize soft lockup detector threads\n");
		return;
	}

	mutex_lock(&watchdog_mutex);
	softlockup_threads_initialized = true;
	lockup_detector_reconfigure();
	mutex_unlock(&watchdog_mutex);
}
static void lockup_detector_update_enable(void)
{
	watchdog_enabled = 0;
	...
	if (soft_watchdog_user_enabled)
		watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
}
int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
					   const struct cpumask *cpumask)
{
	...
	for_each_online_cpu(cpu) {
		ret = __smpboot_create_thread(plug_thread, cpu); /* 创建 @cpu 的内核线程 */
		...
		if (cpumask_test_cpu(cpu, cpumask))
			smpboot_unpark_thread(plug_thread, cpu);  /* 启动 @cpu 的内核线程,进入 smpboot_thread_fn() */
	}
	...
}
static int
__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
{
	struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
	struct smpboot_thread_data *td;

	td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu));
	td->cpu = cpu;
	td->ht = ht;

	/* 
	 * SMP 内核线程的公共入口为 smpboot_thread_fn() , 
	 * 通过 smpboot_thread_fn() 调用具体的线程入口。
	 */
	tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu,
				    ht->thread_comm);
	kthread_park(tsk); /* 暂停内核线程 */
	get_task_struct(tsk);
	*per_cpu_ptr(ht->store, cpu) = tsk;
	...
	return 0;
}

3.3.2 运行 soft lockup watchdog 内核线程

static int smpboot_thread_fn(void *data)
{
	struct smpboot_thread_data *td = data;
	struct smp_hotplug_thread *ht = td->ht; /* watchdog_threads */

	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);
		preempt_disable();
		if (kthread_should_stop()) {
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			/* cleanup must mirror setup */
			if (ht->cleanup && td->status != HP_THREAD_NONE)
				ht->cleanup(td->cpu, cpu_online(td->cpu));
			kfree(td);
			return 0;
		}

		if (kthread_should_park()) {
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			if (ht->park && td->status == HP_THREAD_ACTIVE) {
				BUG_ON(td->cpu != smp_processor_id());
				ht->park(td->cpu);
				td->status = HP_THREAD_PARKED;
			}
			kthread_parkme();
			/* We might have been woken for stop */
			continue;
		}

		BUG_ON(td->cpu != smp_processor_id());

		/* Check for state change setup */
		switch (td->status) {
		case HP_THREAD_NONE:
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			if (ht->setup)
				ht->setup(td->cpu);  /* watchdog_enable(): 初始化 watchdog时间戳,以及时间戳更新间隔检测定时器 */
			td->status = HP_THREAD_ACTIVE;
			continue;

		case HP_THREAD_PARKED:
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			if (ht->unpark)
				ht->unpark(td->cpu);
			td->status = HP_THREAD_ACTIVE;
			continue;
		}

		if (!ht->thread_should_run(td->cpu)) {
			preempt_enable_no_resched();
			schedule();
		} else {
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			/* 更新当前 CPU 的 watchdog时间戳 */
			ht->thread_fn(td->cpu); /* kernel/watchdog.c: watchdog() */
		}
	}
}
static void watchdog_enable(unsigned int cpu)
{
	/* 启动时间戳更新间隔检测定时器 */
	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	hrtimer->function = watchdog_timer_fn;
	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
		      HRTIMER_MODE_REL_PINNED);
	
	__touch_watchdog(); /* 初始 watchdog 的时间戳 */
	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); /* 设置为 RT 调度类的 SCHED_FIFO */
}
static void watchdog(unsigned int cpu)
{
	__this_cpu_write(soft_lockup_hrtimer_cnt,
			 __this_cpu_read(hrtimer_interrupts));
	__touch_watchdog(); /* watchdog 线程负责时不时的更新当前CPU的watchdog时间戳 */
}

3.3.3 触发 soft lockup 问题报告

定时器到期后,进入 watchdog_timer_fn() ,触发可能的 soft lockup 问题报告:

/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); /* 读取当前CPU最近的更新的时间戳 */
	...

	/* kick the softlockup detector */
	wake_up_process(__this_cpu_read(softlockup_watchdog)); /* 唤醒当前CPU时间戳更新线程 */

	/* .. and repeat */
	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));

	if (touch_ts == 0) { /* 休眠重启等类似情形下,时间戳复位为0 */
		...
		__touch_watchdog(); /* 重新初始化时间戳 */
		return HRTIMER_RESTART; /* 重启定时器 */
	}

	duration = is_softlockup(touch_ts); /* 上次更新的时间戳和当前时间戳的差值 */
	if (unlikely(duration)) { /* 时间戳差值大于设定的阈值 */
		...
		
		/* 报告 sotf lockup 问题 */
		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
			smp_processor_id(), duration,
			current->comm, task_pid_nr(current));
		__this_cpu_write(softlockup_task_ptr_saved, current);
		print_modules();
		print_irqtrace_events(current);
		if (regs)
			show_regs(regs);
		else
			dump_stack();
		
		...

		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
		if (softlockup_panic)
			panic("softlockup: hung tasks");
		__this_cpu_write(soft_watchdog_warn, true);
	}  else
		__this_cpu_write(soft_watchdog_warn, false);
	
	return HRTIMER_RESTART;
}
static int is_softlockup(unsigned long touch_ts)
{
	unsigned long now = get_timestamp();

	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
		/* Warn about unreasonable delays. */
		/* 更新时间戳的间隔超过了设定的阈值 */
		if (time_after(now, touch_ts + get_softlockup_thresh()))
			return now - touch_ts;
	}
	return 0;
}

思考一下,为什么 soft lockup 报告问题的时候,直指 current 进程?难道没有可能是 current 之前的某个进程占住 CPU 很长时间(超过设定的检查阈值),然后这个进程放开 CPU,紧接着 current 被调度上来,为之前那个干了坏事的进程背锅吗?为了防止这种情形,必须让 soft lockup 的 watchdog 内核线程拥有更高的被调度优先权,这样一旦出现调度机会,soft lockup 的 watchdog 内核线程总是比其它普通进程被先调度到 CPU 上执行。看代码片段:

static void watchdog_enable(unsigned int cpu)
{
	...
	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); /* 设置为 RT 调度类的 SCHED_FIFO */
}

看到了吗?将 soft lockup 的 watchdog 内核线程的调度类切换为高优先级别的实时调度类,调度策略为 SCHED_FIFO 。因为更高优先级的调度类的进程,比其它低优先级调度类(如CFS)的进程拥有更高的优先调度权,如此保证了不会发生上述情形。但是, soft lockup 的 watchdog 内核线程总是比其它低优先调度类的进程先被调度,那其它低优先调度类的进程怎么办?啥时候轮到它们执行?soft lockup 的 watchdog 内核线程毕竟只是起到个监督作用,它也不需要时时刻运行,所以在不需要的时候,它们会主动让出 CPU 来,让其它低优先级调度类的进程有机会得到执行。看下面的代码:

static int smpboot_thread_fn(void *data)
{
	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);
		preempt_disable();
		...

		/* 
		 * 当前 【定时器计数 hrtimer_interrupts】 和 【watchdog 时间戳更新次数
		 * soft_lockup_hrtimer_cnt】 同步(计数相同) ,表示当前处在【上次 watchdog 
		 * 时间戳更新过后,检测定时器尚未到期之前】的一段时间内,这段期间无需更新 
		 * watchdog 时间戳,主动让出 CPU 给其它进程,进入 TASK_INTERRUPTIBLE 睡眠
		 * 态,定时器到期后会唤醒它更新 watchdog 时间戳。
		 */
		if (!ht->thread_should_run(td->cpu)) { /* kernel/watchdog.c: watchdog_should_run() */
			preempt_enable_no_resched();
			schedule();
		} else { /* 定时器到期了,唤醒 watchdog 内核线程更新时间戳 */
			__set_current_state(TASK_RUNNING);
			preempt_enable();
			ht->thread_fn(td->cpu); /* kernel/watchdog.c: watchdog() */
		}
	}
}

/* 当然, hrtimer_interrupts 和 soft_lockup_hrtimer_cnt 都是每 CPU 的 */
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);

/* 定时器到期了,watchdog 该起来更新时间戳 */
static int watchdog_should_run(unsigned int cpu)
{
	return __this_cpu_read(hrtimer_interrupts) !=
		__this_cpu_read(soft_lockup_hrtimer_cnt);
}

/*
 * 每次更新 watchdog 时间戳的时候,
 * 同步 hrtimer_interrupts (检测定时器触发的次数) 和 soft_lockup_hrtimer_cnt 计数。
 */
static void watchdog(unsigned int cpu)
{
	/* 同步 hrtimer_interrupts (检测定时器触发的次数) 和 soft_lockup_hrtimer_cnt 计数 */
	__this_cpu_write(soft_lockup_hrtimer_cnt,
			 __this_cpu_read(hrtimer_interrupts));
	__touch_watchdog(); /* watchdog 线程负责时不时的更新时间戳 */
}

/* 定期检查定时器 */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
	...
	
	watchdog_interrupt_count(); /* /* 递增定时器触发次数 @hrtimer_interrupts */

	wake_up_process(__this_cpu_read(softlockup_watchdog)); /* 唤醒当前CPU时间戳更新watchdog线程 */

	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); /* 重启定时器 */
	
	...

	duration = is_softlockup(touch_ts);
	if (unlikely(duration)) {
		/* 报告 soft lockup */
	}
	...
}

看到这里,疑问又产生了,在 watchdog_timer_fn() 中,又是唤醒 watchdog 线程更新时间戳,又是检查 soft lockup 问题,这两个逻辑同时进行,不是互相冲突矛盾?先来解释下,为什么要对 watchdog 线程进行唤醒。前面我们看到,watchdog 线程会在一定条件下,主动让出 CPU 进入睡眠,所以需要在合适时机(就是定时器到期时)唤醒它,更新 watchdog 时间戳。在 watchdog_timer_fn() 中,唤醒 watchdog 线程更新时间戳检查 soft lockup 问题 两个逻辑并行,并不矛盾。关键在于,定时器的周期,和判定 soft lockup 问题 的阈值并不相同。来看代码:

static void lockup_detector_reconfigure(void)
{
	...
	set_sample_period();
	...
}

static void set_sample_period(void)
{
	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
	...
}

static void watchdog_enable(unsigned int cpu)
{
	...
	
	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	hrtimer->function = watchdog_timer_fn;
	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
		      HRTIMER_MODE_REL_PINNED);
	
	...
}

原来,定时器的周期是检查阈值的的1/5,疑问得到了解答。

4. soft lockup 举例

#include 
#include 
#include 
#include 


static struct task_struct *softlockup_task;


static int softlockup_task_fn(void *ignored)
{
	int ret = 0;

	while (!kthread_should_stop()) {
	#if (defined(CONFIG_PREEMPT_NONE) || defined(CONFIG_PREEMPT_VOLUNTARY))
		asm("nop");
	#else /* CONFIG_PREEMPT */
		preempt_disable();
		mdelay(30 * 1000);
		preempt_enable();
	#endif
	}

	return ret;
}


static int __init softlockup_task_demo_init(void)
{
	int ret = 0;

	softlockup_task = kthread_run(softlockup_task_fn, NULL, "softlockup_task");
	if (IS_ERR(softlockup_task)) {
		ret = PTR_ERR(softlockup_task);
		printk(KERN_ERR "%s: Failed to create kernel thread, ret = [%d]\n", __func__, ret);
	}

	printk(KERN_INFO "soft lockup task example module loaded.\n");

	return ret;
}

static void __exit softlockup_task_demo_exit(void)
{
	if (softlockup_task) {
		kthread_stop(softlockup_task);
		softlockup_task = NULL;
	}

	printk(KERN_INFO "soft lockup task example module exited.\n");
}

module_init(softlockup_task_demo_init);
module_exit(softlockup_task_demo_exit);

MODULE_LICENSE("GPL");

这里 或 这里 包含完整代码。编译测试模块:

#
# 配置
#

# 抢占模式 3 选一 
CONFIG_PREEMPT_NONE=y
#CONFIG_PREEMPT_VOLUNTARY=y
#CONFIG_PREEMPT=y

CONFIG_DEBUG_KERNEL=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_SAMPLE_SOFTLOCKUP=m
cd linux-4.14.132
make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- -j8 O=output

sudo mount rootfs.img temp
cd linux-4.14.132
sudo make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- O=output INSTALL_MOD_PATH=/path/to/temp modules_install
cd -
sudo umount temp

然后用QEMU启动系统,加载测试模块:

sudo qemu-system-arm \
     -M vexpress-a9 \
     -smp 4 \
     -m 512M \
     -kernel /path/to/zImage \
     -dtb /path/to/vexpress-v2p-ca9.dtb \
     -nographic \
     -append "root=/dev/mmcblk0 rw rootfstype=ext4 console=ttyAMA0" \
     -sd rootfs.img
# 当前位于 QEMU 模拟器系统特权模式下

# ps -eo pid,class,rtprio,pri,psr,comm | grep watchdog
   12 FF      99 139   0 watchdog/0
   15 FF      99 139   1 watchdog/1
   21 FF      99 139   2 watchdog/2
   27 FF      99 139   3 watchdog/3

# modprobe softlockup_example

模块加载一段时间后,报 soft lockup BUG:

[  157.356865] soft lockup task example module loaded.
[  176.383362] INFO: rcu_sched self-detected stall on CPU
[  176.384981] 	1-...: (2113 ticks this GP) idle=a42/140000000000001/0 softirq=2326/2326 fqs=1019 
[  176.385513] 	 (t=2100 jiffies g=903 c=902 q=636)
[  176.387315] NMI backtrace for cpu 1
[  176.387904] CPU: 1 PID: 939 Comm: softlockup_task Not tainted 4.14.132 #34
[  176.387942] Hardware name: ARM-Versatile Express
[  176.391912] [<8011149c>] (unwind_backtrace) from [<8010c330>] (show_stack+0x20/0x24)
[  176.393289] [<8010c330>] (show_stack) from [<806ddfd8>] (dump_stack+0x8c/0xa0)
[  176.393356] [<806ddfd8>] (dump_stack) from [<806e3dc4>] (nmi_cpu_backtrace+0xc0/0xc4)
[  176.393417] [<806e3dc4>] (nmi_cpu_backtrace) from [<806e3eb0>] (nmi_trigger_cpumask_backtrace+0xe8/0x12c)
[  176.393474] [<806e3eb0>] (nmi_trigger_cpumask_backtrace) from [<8010f490>] (arch_trigger_cpumask_backtrace+0x20/0x24)
[  176.393534] [<8010f490>] (arch_trigger_cpumask_backtrace) from [<80182ea0>] (rcu_dump_cpu_stacks+0xac/0xd8)
[  176.393672] [<80182ea0>] (rcu_dump_cpu_stacks) from [<80182478>] (rcu_check_callbacks+0x7f8/0x9f8)
[  176.393725] [<80182478>] (rcu_check_callbacks) from [<80187f84>] (update_process_times+0x44/0x6c)
[  176.393775] [<80187f84>] (update_process_times) from [<80197144>] (tick_periodic+0x4c/0xcc)
[  176.393828] [<80197144>] (tick_periodic) from [<80197368>] (tick_handle_periodic+0x38/0x98)
[  176.393877] [<80197368>] (tick_handle_periodic) from [<8010ffe4>] (twd_handler+0x40/0x50)
[  176.393924] [<8010ffe4>] (twd_handler) from [<80172128>] (handle_percpu_devid_irq+0x98/0x24c)
[  176.393979] [<80172128>] (handle_percpu_devid_irq) from [<8016c728>] (generic_handle_irq+0x34/0x44)
[  176.394031] [<8016c728>] (generic_handle_irq) from [<8016cd3c>] (__handle_domain_irq+0x6c/0xc4)
[  176.394125] [<8016cd3c>] (__handle_domain_irq) from [<80101508>] (gic_handle_irq+0x5c/0xa0)
[  176.394166] [<80101508>] (gic_handle_irq) from [<8010d10c>] (__irq_svc+0x6c/0x90)
[  176.394240] Exception stack(0x9e90df18 to 0x9e90df60)
[  176.394533] df00:                                                       00000000 9e983bc0
[  176.394835] df20: 00000000 9e983bc0 9e983bc0 00000000 9f5fc940 9e90c000 00000000 9e983bdc
[  176.394945] df40: 9e8fdd30 9e90df74 9e90df68 9e90df68 7f000020 7f000020 00000013 ffffffff
[  176.396350] [<8010d10c>] (__irq_svc) from [<7f000020>] (softlockup_task_fn+0x20/0x30 [softlockup_example])
[  176.396667] [<7f000020>] (softlockup_task_fn [softlockup_example]) from [<80143ae0>] (kthread+0x144/0x174)
[  176.396718] [<80143ae0>] (kthread) from [<80107ee8>] (ret_from_fork+0x14/0x2c)
[  204.214887] watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [softlockup_task:939]
[  204.215332] Modules linked in: softlockup_example
[  204.215593] CPU: 1 PID: 939 Comm: softlockup_task Not tainted 4.14.132 #34
[  204.215604] Hardware name: ARM-Versatile Express
[  204.215648] task: 9f700c00 task.stack: 9e90c000
[  204.215701] PC is at kthread_should_stop+0x30/0x54
[  204.215736] LR is at softlockup_task_fn+0x20/0x30 [softlockup_example]
[  204.215755] pc : [<80143524>]    lr : [<7f000020>]    psr: 00000013
[  204.215770] sp : 9e90df50  ip : 9e90df68  fp : 9e90df64
[  204.215785] r10: 9e8fdd30  r9 : 9e983bdc  r8 : 00000000
[  204.215804] r7 : 9e90c000  r6 : 9f5fc940  r5 : 00000000  r4 : 9f700c00
[  204.215821] r3 : 00208040  r2 : 00000000  r1 : 9e983bc0  r0 : 00000000
[  204.215875] Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[  204.215899] Control: 10c5387d  Table: 7e92806a  DAC: 00000051
[  204.215927] CPU: 1 PID: 939 Comm: softlockup_task Not tainted 4.14.132 #34
[  204.215936] Hardware name: ARM-Versatile Express
[  204.215979] [<8011149c>] (unwind_backtrace) from [<8010c330>] (show_stack+0x20/0x24)
[  204.216011] [<8010c330>] (show_stack) from [<806ddfd8>] (dump_stack+0x8c/0xa0)
[  204.216039] [<806ddfd8>] (dump_stack) from [<80108acc>] (show_regs+0x1c/0x20)
[  204.216067] [<80108acc>] (show_regs) from [<801b5f08>] (watchdog_timer_fn+0x248/0x2c0)
[  204.216095] [<801b5f08>] (watchdog_timer_fn) from [<80188f48>] (hrtimer_run_queues+0x1b8/0x370)
[  204.216147] [<80188f48>] (hrtimer_run_queues) from [<80187f14>] (run_local_timers+0x24/0x50)
[  204.216182] [<80187f14>] (run_local_timers) from [<80187f7c>] (update_process_times+0x3c/0x6c)
[  204.216207] [<80187f7c>] (update_process_times) from [<80197144>] (tick_periodic+0x4c/0xcc)
[  204.216232] [<80197144>] (tick_periodic) from [<80197368>] (tick_handle_periodic+0x38/0x98)
[  204.216256] [<80197368>] (tick_handle_periodic) from [<8010ffe4>] (twd_handler+0x40/0x50)
[  204.216282] [<8010ffe4>] (twd_handler) from [<80172128>] (handle_percpu_devid_irq+0x98/0x24c)
[  204.216312] [<80172128>] (handle_percpu_devid_irq) from [<8016c728>] (generic_handle_irq+0x34/0x44)
[  204.216338] [<8016c728>] (generic_handle_irq) from [<8016cd3c>] (__handle_domain_irq+0x6c/0xc4)
[  204.216363] [<8016cd3c>] (__handle_domain_irq) from [<80101508>] (gic_handle_irq+0x5c/0xa0)
[  204.216385] [<80101508>] (gic_handle_irq) from [<8010d10c>] (__irq_svc+0x6c/0x90)
[  204.216399] Exception stack(0x9e90df00 to 0x9e90df48)
[  204.216511] df00: 00000000 9e983bc0 00000000 00208040 9f700c00 00000000 9f5fc940 9e90c000
[  204.216617] df20: 00000000 9e983bdc 9e8fdd30 9e90df64 9e90df68 9e90df50 7f000020 80143524
[  204.216657] df40: 00000013 ffffffff
[  204.216697] [<8010d10c>] (__irq_svc) from [<80143524>] (kthread_should_stop+0x30/0x54)
[  204.216730] [<80143524>] (kthread_should_stop) from [<7f000020>] (softlockup_task_fn+0x20/0x30 [softlockup_example])
[  204.216766] [<7f000020>] (softlockup_task_fn [softlockup_example]) from [<80143ae0>] (kthread+0x144/0x174)
[  204.216791] [<80143ae0>] (kthread) from [<80107ee8>] (ret_from_fork+0x14/0x2c)

从上面信息可以看到,soft lockup 机制是依赖于中断的:当前 CPU 被占住无法调度的情形下,只有中断才能打断它,而用于检测的定时器接口,正是在中断上下文下执行。另外,只有内核代码问题才能导致 soft lockup ,因为用户空间代码无法关闭抢占,也总是会被抢占的。 一旦发生了 soft lockup ,可以肯定这是内核问题代码导致的。

5. soft lockup 用户空间接口

/proc/sys/kernel/soft_watchdog # 是否启用 softlockup 功能
/proc/sys/kernel/softlockup_panic # lockup是否导致kernel panic
/proc/sys/kernel/softlockup_all_cpu_backtrace # 是否不只输出lockup CPU调用栈,还输出其它CPU调用栈
/proc/sys/kernel/watchdog_thresh # softlockup 触发时间阈值
/proc/sys/kernel/watchdog_cpumask # 启用 softlockup 检测的CPU掩码

你可能感兴趣的:(#,追踪,&,调试,&,性能,linux)