rtmutex 的死锁检测

鄙人孤陋寡闻,以为linux里死锁检测就只有rtmutex里用到一点。先个出rtmutex被阻塞的一个场景,这里直接给出简化后的代码:

int rtmutex_lock(struct rt_mutex *lock)
{
	if (rt_mutex_cmpxchg(lock, NULL, current)) {
		return 0;
	} else
		return rt_mutex_slowlock(lock, state, NULL, detect_deadlock);
}

# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
unsigned long rt_mutex_cmpxchg(struct rt_mutex *lock,unsigned long old, unsigned long new)
{
	unsigned long retval;
	retval = lock->owner;
	if (retval == old){
	    *lock->owner = new;
            return 1;
        }else
	    return 0;
}


static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
		    struct hrtimer_sleeper *timeout,
		    struct rt_mutex_waiter *waiter,
		    int detect_deadlock, unsigned long flags)
{
   for (;;) {
       if (try_to_take_rt_mutex(lock))
			break;
       if (!waiter->task) {
		ret = task_blocks_on_rt_mutex(lock, waiter, current,
						      detect_deadlock, flags);
   }

   }
 
}
真正的阻塞函数是task_blocks_on_rt_mutex,这个函数就是把当前进程阻塞到资源lock上,

如果有必要还要调整lock持有者的优先级。调整了lock持有者的优先级,还要看lock持有者是否

 被另外一把锁lock2阻塞,如果是的话,还要调整lock2持有者的优先级,依次类推,原则就是

让锁持有者尽快完成任务,就可以释放资源了。

static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
				   struct rt_mutex_waiter *waiter,
				   struct task_struct *task,
				   int detect_deadlock, unsigned long flags)
{
        struct task_struct* owner = rtmutex_owner(lock);
	struct rt_mutex_waiter waiter;
	struct rt_mutex_waiter *top_waiter 
		= rt_mutex_top_waiter(lock);
        waiter.task = current;
	waiter.lock = lock;
	plist_add(&waiter.list_entry,&lock->wait_list);
        if(waiter == rt_mutex_top_waiter(lock)){
		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
		plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
		owner->prio = min(task_top_pi_waiter(owner)->pi_list_entry.prio,
		   task->normal_prio);
		
        }
	rt_mutex_adjust_prio_chain(owner);
}
连环提升优先级:

static int rt_mutex_adjust_prio_chain(struct task_struct *task)
{
	if(!task->state) // running
		return 0;

	struct rt_mutex *new_lock = task->pi_blocked_on.lock;
	struct rt_mutex_waiter *waiter = task->waiter;
	
	struct rt_mutex_waiter *old_lock_waiter = rt_mutex_top_waiter(new_lock);
	struct rt_mutex_waiter *old_task_resource;

	//因为task的优先级变化了,我们需要将他在lock的wait_list(排序)的位置移动一下
	plist_del(&waiter.list_entry,&new_lock->wait_list);
	plist_add(&waiter.list_entry,&new_lock->wait_list);
	
	//1) 如果这个task由于提升优先级,成为了new_lock的最高优先级等待者,或者,
	//2) 如果这个task曾经是new_lock最高优先级等待者,由于优先级降低,他不再是new_lock的最高等待者
	//3) 那么都需要调整一下new_lock持有者的资源链表的最高优先级等待者集合的顺序

	struct task_struct* new_owner = rtmutex_owner(new_lock);
	old_task_resource = task_top_pi_waiter(new_owner);

	if(waiter == rt_mutex_top_waiter(lock)){ //情况1
		
		//对new_lock持有者的资源链表重新排序
	
		plist_del(&waiter->pi_list_entry, &new_owner->pi_waiters);
		plist_add(&waiter->pi_list_entry, &new_owner->pi_waiters);
	
		//如果排序后,这个task等待的锁成为了new_lock持有者new_owner的最高优先级资源
		//(不管以前它是不是new_owner的最高资源),
		//则需要修改new_lock持有者的优先级。由于修改了优先级,需要继续往下传递。
		if(waiter == task_top_pi_waiter(new_owner)) {
			new_owner->prio = waiter->prio;
			return rt_mutex_adjust_prio_chain(new_owner);
		}
        }else if(waiter == old_lock_waiter){ //情况2,曾经是lock的最高等待者,现在退位了
		//对new_lock持有者的资源链表重新排序,
		//删除以前的lock最高等待者,把目前lock的最高等待者加入链表

		plist_del(&waiter->pi_list_entry, &new_owner->pi_waiters);
		plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry, &new_owner->pi_waiters);
		
		//如果以前task等待的那把锁,是new_owner的最高资源,
		//则这次无论如何new_owner的优先级都要降低了
		//将其优先级降低到目前owner持有的资源里最高者,
		//然后往下传递
		if(waiter == old_task_resource) {
			new_owner->prio = task_top_pi_waiter(new_owner)->pi_list_entry.prio;
			return rt_mutex_adjust_prio_chain(new_owner);
		}
        }
	return 0;

}
从上面的流程可以大体看出,死锁环成立的条件,

情形一,死锁阻塞链的第二个元素,和最后一个元素,都被同一个锁阻塞 (lock == orig_lock) 
情形二,首尾相连的环,即最后一把锁的持有者是第一个被阻塞的进程(rt_mutex_owner(lockn) == top_task)
是否还少了一种检测,是链表中间第n个到第n+x个形成死锁环
其实,如果是中间形成死锁,那么在之前他死锁时就应该被检测出来。

学过操作系统的应该知道,有多种死锁检测机制,比如银行家算法等,这点linux可以进一步优化。

下面分析。

未完待续



你可能感兴趣的:(死锁检测,rtmutex)