鄙人孤陋寡闻,以为linux里死锁检测就只有rtmutex里用到一点。先个出rtmutex被阻塞的一个场景,这里直接给出简化后的代码:
int rtmutex_lock(struct rt_mutex *lock) { if (rt_mutex_cmpxchg(lock, NULL, current)) { return 0; } else return rt_mutex_slowlock(lock, state, NULL, detect_deadlock); } # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) unsigned long rt_mutex_cmpxchg(struct rt_mutex *lock,unsigned long old, unsigned long new) { unsigned long retval; retval = lock->owner; if (retval == old){ *lock->owner = new; return 1; }else return 0; } static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter, int detect_deadlock, unsigned long flags) { for (;;) { if (try_to_take_rt_mutex(lock)) break; if (!waiter->task) { ret = task_blocks_on_rt_mutex(lock, waiter, current, detect_deadlock, flags); } } }真正的阻塞函数是task_blocks_on_rt_mutex,这个函数就是把当前进程阻塞到资源lock上,
如果有必要还要调整lock持有者的优先级。调整了lock持有者的优先级,还要看lock持有者是否
被另外一把锁lock2阻塞,如果是的话,还要调整lock2持有者的优先级,依次类推,原则就是
让锁持有者尽快完成任务,就可以释放资源了。
static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, int detect_deadlock, unsigned long flags) { struct task_struct* owner = rtmutex_owner(lock); struct rt_mutex_waiter waiter; struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); waiter.task = current; waiter.lock = lock; plist_add(&waiter.list_entry,&lock->wait_list); if(waiter == rt_mutex_top_waiter(lock)){ plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); owner->prio = min(task_top_pi_waiter(owner)->pi_list_entry.prio, task->normal_prio); } rt_mutex_adjust_prio_chain(owner); }连环提升优先级:
static int rt_mutex_adjust_prio_chain(struct task_struct *task) { if(!task->state) // running return 0; struct rt_mutex *new_lock = task->pi_blocked_on.lock; struct rt_mutex_waiter *waiter = task->waiter; struct rt_mutex_waiter *old_lock_waiter = rt_mutex_top_waiter(new_lock); struct rt_mutex_waiter *old_task_resource; //因为task的优先级变化了,我们需要将他在lock的wait_list(排序)的位置移动一下 plist_del(&waiter.list_entry,&new_lock->wait_list); plist_add(&waiter.list_entry,&new_lock->wait_list); //1) 如果这个task由于提升优先级,成为了new_lock的最高优先级等待者,或者, //2) 如果这个task曾经是new_lock最高优先级等待者,由于优先级降低,他不再是new_lock的最高等待者 //3) 那么都需要调整一下new_lock持有者的资源链表的最高优先级等待者集合的顺序 struct task_struct* new_owner = rtmutex_owner(new_lock); old_task_resource = task_top_pi_waiter(new_owner); if(waiter == rt_mutex_top_waiter(lock)){ //情况1 //对new_lock持有者的资源链表重新排序 plist_del(&waiter->pi_list_entry, &new_owner->pi_waiters); plist_add(&waiter->pi_list_entry, &new_owner->pi_waiters); //如果排序后,这个task等待的锁成为了new_lock持有者new_owner的最高优先级资源 //(不管以前它是不是new_owner的最高资源), //则需要修改new_lock持有者的优先级。由于修改了优先级,需要继续往下传递。 if(waiter == task_top_pi_waiter(new_owner)) { new_owner->prio = waiter->prio; return rt_mutex_adjust_prio_chain(new_owner); } }else if(waiter == old_lock_waiter){ //情况2,曾经是lock的最高等待者,现在退位了 //对new_lock持有者的资源链表重新排序, //删除以前的lock最高等待者,把目前lock的最高等待者加入链表 plist_del(&waiter->pi_list_entry, &new_owner->pi_waiters); plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry, &new_owner->pi_waiters); //如果以前task等待的那把锁,是new_owner的最高资源, //则这次无论如何new_owner的优先级都要降低了 //将其优先级降低到目前owner持有的资源里最高者, //然后往下传递 if(waiter == old_task_resource) { new_owner->prio = task_top_pi_waiter(new_owner)->pi_list_entry.prio; return rt_mutex_adjust_prio_chain(new_owner); } } return 0; }从上面的流程可以大体看出,死锁环成立的条件,
情形一,死锁阻塞链的第二个元素,和最后一个元素,都被同一个锁阻塞 (lock == orig_lock)
情形二,首尾相连的环,即最后一把锁的持有者是第一个被阻塞的进程(rt_mutex_owner(lockn) == top_task)
是否还少了一种检测,是链表中间第n个到第n+x个形成死锁环
其实,如果是中间形成死锁,那么在之前他死锁时就应该被检测出来。
学过操作系统的应该知道,有多种死锁检测机制,比如银行家算法等,这点linux可以进一步优化。
下面分析。
未完待续