最近在一次的稳定性测试中发现如下问题:
c7 BUG: spinlock bad magic on CPU#7, Binder_5/6373
c7 lock: system_int_lock+0x0/0x18, .magic: 00000000, .owner: /-1, .owner_cpu: 0
c7 CPU: 7 PID: 6373 Comm: Binder_5 Tainted:
c7 Call trace:
c7 [] dump_backtrace+0x0/0x164
c7 [] show_stack+0x1c/0x28
c7 [] dump_stack+0x74/0xb8
c7 [] spin_dump+0x78/0xa0
c7 [] spin_bug+0x2c/0x3c
c7 [] do_raw_spin_lock+0xac/0x1bc
c7 [] _raw_spin_lock_irq+0x2c/0x3c
c7 [] wait_for_irq+0x12c/0x420
c7 [] ispdrv_open+0xbf8/0xfd4
c7 [] isp_ioctl+0x1594/0x2478
c7 [] compat_isp_ioctl+0x864/0x1e78
c7 [] compat_SyS_ioctl+0xbc/0x1688
spin_lock_irq(&system_int_lock);
g_latest_system_int &= ~event;
spin_unlock_irq(&system_int_lock);
可以看到这代码就是保证g_latest_system_int的原子操作而已。
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);
if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
__spin_lock_debug(lock);
debug_spin_lock_after(lock);
}
上面是do_raw_spin_lock函数调用,如果开启CONFIG_DEBUG_SPINLOCK配置项的话,就会进入到debug_spin_lock_before函数中。
static inline void
debug_spin_lock_before(raw_spinlock_t *lock)
{
SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
SPIN_BUG_ON(lock->owner == current, lock, "recursion");
SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
lock, "cpu recursion");
}
#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
static void spin_bug(raw_spinlock_t *lock, const char *msg)
{
if (!debug_locks_off())
return;
spin_dump(lock, msg);
}
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
struct task_struct *owner = NULL;
if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
owner = lock->owner;
printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
msg, raw_smp_processor_id(),
current->comm, task_pid_nr(current));
printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
".owner_cpu: %d\n",
lock, lock->magic,
owner ? owner->comm : "",
owner ? task_pid_nr(owner) : -1,
lock->owner_cpu);
dump_stack();
}
可以得出,lock是system_int_lock, lock->magic=000000, owner=
那为什么是这样的? 是什么导致出现这个问题的?
不妨看下spin_lock的结构体定义:
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
#ifdef CONFIG_DEBUG_SPINLOCK
unsigned int magic, owner_cpu;
void *owner;
#endif
} raw_spinlock_t;
在开启CONFIG_DEBUG_SPINLOCK的情况下, magic, owner_cpu, owner是有意义的。
那这几个值是在什么地方设置的?
#ifdef CONFIG_DEBUG_SPINLOCK
# define SPIN_DEBUG_INIT(lockname) \
.magic = SPINLOCK_MAGIC, \
.owner_cpu = -1, \
.owner = SPINLOCK_OWNER_INIT,
#else
# define SPIN_DEBUG_INIT(lockname)
#endif
可以看到SPIN_DEBUG_INIT宏中会对这几个变量设置值的。再沿着找下去。
最终确认是spinlock变量没有初始化,如果初始化,将走如下流程:
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
#define __SPIN_LOCK_INITIALIZER(lockname) \
{ { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
{ \
.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
SPIN_DEBUG_INIT(lockname) \
SPIN_DEP_MAP_INIT(lockname) }
#ifdef CONFIG_DEBUG_SPINLOCK
# define SPIN_DEBUG_INIT(lockname) \
.magic = SPINLOCK_MAGIC, \
.owner_cpu = -1, \
.owner = SPINLOCK_OWNER_INIT,
#else
# define SPIN_DEBUG_INIT(lockname)
#endif