Linux offers two kinds of semaphores:
• Kernel semaphores, which are used by kernel control paths
• System V IPC semaphores, which are used by User Mode processes
struct semaphore { spinlock_t lock; unsigned int count; struct list_headwait_list; };
struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; spinlock_t wait_lock; //自旋锁类型,保证多cpu下,对等待队列访问是安全的。 struct list_headwait_list; #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) struct task_struct*owner; #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; void *magic; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_mapdep_map; #endif };
how to use the semaphore ?
/** * down_interruptible - acquire the semaphore unless interrupted * @sem: the semaphore to be acquired * * Attempts to acquire the semaphore. If no more tasks are allowed to * acquire the semaphore, calling this function will put the task to sleep. * If the sleep is interrupted by a signal, this function will return -EINTR. * If the semaphore is successfully acquired, this function returns 0. */ int down_interruptible(struct semaphore *sem)和down的区别主要在于设置当前进程的current为interrupt { unsigned long flags; int result = 0; spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) sem->count--; else result = __down_interruptible(sem); spin_unlock_irqrestore(&sem->lock, flags); return result; } static noinline int __sched __down_interruptible(struct semaphore *sem) { return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); }
/** * down - acquire the semaphore * @sem: the semaphore to be acquired * * Acquires the semaphore. If no more tasks are allowed to acquire the * semaphore, calling this function will put the task to sleep until the * semaphore is released. * * Use of this function is deprecated, please use down_interruptible() or * down_killable() instead. */ void down(struct semaphore *sem) { unsigned long flags; spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) count非零减一,否则down为了获取信号量线程等待而休眠; sem->count--; else __down(sem); pin_unlock_irqrestore(&sem->lock, flags); } static noinline void __sched __down(struct semaphore *sem) { __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); }
-down_common()
/* * Because this function is inlined, the 'state' parameter will be * constant, and thus optimised away by the compiler. Likewise the * 'timeout' parameter for the cases without timeouts. */ static inline int __sched __down_common(struct semaphore *sem, long state, long timeout) { struct task_struct *task = current; //获取当前进程的指针 struct semaphore_waiter waiter; list_add_tail(&waiter.list, &sem->wait_list); //将 list 添加到信号semaphore中去; waiter.task = task; waiter.up = 0; for (;;) { if (signal_pending_state(state, task)) goto interrupted; if (timeout <= 0) goto timed_out; __set_task_state(task, state); spin_unlock_irq(&sem->lock); timeout = schedule_timeout(timeout); spin_lock_irq(&sem->lock); if (waiter.up) 已经被激活;;;这个线程被激活;;每一个休眠的都拥有一个waiter.up return 0; } timed_out: list_del(&waiter.list); return -ETIME; interrupted: list_del(&waiter.list); return -EINTR; }
对于up;
/** * up - release the semaphore * @sem: the semaphore to release * * Release the semaphore. Unlike mutexes, up() may be called from any * context and even by tasks which have never called down(). */ void up(struct semaphore *sem) {释放信号量 如果wait_list为空,那么说明此时就只有自己拥有信号量,没有发生抢占,count++; 如果不为空,说明此时 释放后,有进程会获取该信号量;原先休眠的进程在此被唤醒 unsigned long flags; spin_lock_irqsave(&sem->lock, flags); if (likely(list_empty(&sem->wait_list))) sem->count++; else __up(sem); spin_unlock_irqrestore(&sem->lock, flags); } static noinline void __sched __up(struct semaphore *sem) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); waiter->up = 1; wake_up_process(waiter->task);//wakeup 进程 }a获取信号后,b、c也想获取; 释放后会产生什么样结果???随机唤醒...list 的头
/**
* down_trylock - try to acquire the semaphore, without waiting
* @sem: the semaphore to be acquired
*
* Try to acquire the semaphore atomically. Returns 0 if the mutex has
* been acquired successfully or 1 if it it cannot be acquired.
*
* NOTE: This return value is inverted from both spin_trylock and
* mutex_trylock! Be careful about this when converting code.
*
* Unlike mutex_trylock, this function can be used from interrupt context,
* and the semaphore can be released by any task or interrupt.
*/
int down_trylock(struct semaphore *sem)
{
unsigned long flags;
int count;
spin_lock_irqsave(&sem->lock, flags);
count = sem->count - 1;
if (likely(count >= 0))
sem->count = count;
spin_unlock_irqrestore(&sem->lock, flags);
return (count < 0);
}
对于互斥量mutex:
mutex初始化:
# define mutex_init(mutex) \
do { \
static struct lock_class_key __key;\
\
__mutex_init((mutex), #mutex, &__key);\
} while (0)
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { atomic_set(&lock->count, 1); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); debug_mutex_init(lock, name, key); }
#define mutex_lock(lock) mutex_lock_nested(lock, 0)
mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); }
对于might_sleep()将会调用_cond_sleep() int __sched _cond_resched(void) { if (should_resched()) { __cond_resched(); return 1; } return 0; } static inline int should_resched(void) { return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); } static void __cond_resched(void) { add_preempt_count(PREEMPT_ACTIVE); __schedule(); sub_preempt_count(PREEMPT_ACTIVE); }
/* * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned long flags; preempt_disable();禁止内核抢占 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Optimistic spinning. * * We try to spin for acquisition when we find that there are no * pending waiters and the lock owner is currently running on a * (different) CPU. * * The rationale is that if the lock owner is running, it is likely to * release the lock soon. * * Since this needs the lock owner, and this mutex implementation * doesn't track the owner atomically in the lock field, we need to * track it non-atomically. * * We can't do this for DEBUG_MUTEXES because that relies on wait_lock * to serialize everything. */ for (;;) { struct task_struct *owner; /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) break; if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); preempt_enable(); return 0; } /* * When there's no owner, we might have preempted between the * owner acquiring the lock and setting the owner field. If * we're an RT task that will live-lock because we won't let * the owner complete. */ if (!owner && (need_resched() || rt_task(task))) break; /* * The cpu_relax() call is a compiler barrier which forces * everything in this loop to be re-loaded. We don't need * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ arch_mutex_cpu_relax(); } #endif spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; if (atomic_xchg(&lock->count, -1) == 1)用一条汇编指令对count进行付值,lock->count=-1, 保证该操作在一个cpu上是原子的 goto done; lock_contended(&lock->dep_map, ip); for (;;) { /* * Lets try to take the lock again - this is needed even if * we get here for the first time (shortly after failing to * acquire the lock), to make sure that we get a wakeup once * it's unlocked. Later on, if we sleep, this is the * operation that gives us the lock. We xchg it to -1, so * that when we release the lock, we properly wake up the * other waiters:在这个地方,又尝试去获取锁, */ if (atomic_xchg(&lock->count, -1) == 1) break; 对于atomic_xchg 会调用 如下asm volatile("xchgb %0,%1"<span style="white-space:pre"> : "=q" (__x), "+m" (*__ptr)<span style="white-space:pre"> : "0" (__x)<span style="white-space:pre"> : "memory");<span style="white-space:pre"> </span>类似于memory brriers /* * got a signal? (This code gets eliminated in the * TASK_UNINTERRUPTIBLE case.) */如果该进程是可中断的,或者该进程是可kiilable的,如果有信号 被递送到该任务,那么该进程将从等待队列中移除 if (unlikely(signal_pending_state(state, task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return -EINTR; } __set_task_state(task, state); /* didn't get the lock, go to sleep:不能获取lock */ spin_unlock_mutex(&lock->wait_lock, flags); preempt_enable_no_resched(); schedule(); preempt_disable(); spin_lock_mutex(&lock->wait_lock, flags); } done:表示已经获取了锁 lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, current_thread_info());将该任务从等待队列中删除 mutex_set_owner(lock);//设置属性owner /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list)))如果等待队列为空将lock->count置为0获取到锁 atomic_set(&lock->count, 0); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return 0; }
/**
* mutex_unlock - release the mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously.
*
* This function must not be used in interrupt context. Unlocking
* of a not locked mutex is not allowed.
*
* This function is similar to (but not equivalent to) up().
*/
void __sched mutex_unlock(struct mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(lock);
#endif
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
/** * __mutex_fastpath_unlock - increment and call function if nonpositive * @v: pointer of type atomic_t * @fail_fn: function to call if the result is nonpositive * * Atomically increments @v and calls <fail_fn> if the result is nonpositive. */ #define __mutex_fastpath_unlock(v, fail_fn) \ do { \ unsigned long dummy; \ \ typecheck(atomic_t *, v); \ typecheck_fn(void (*)(atomic_t *), fail_fn);调用fail_fn即__mutex_unlock_slowpath \ \ asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \ " jg 1f\n" \ " call " #fail_fn "\n" \ "1:" \ : "=D" (dummy) \ : "D" (v) \ : "rax", "rsi", "rdx", "rcx", \ "r8", "r9", "r10", "r11", "memory"); \ } while (0) static __used noinline void __mutex_unlock_slowpath(atomic_t *lock_count) { __mutex_unlock_common_slowpath(lock_count, 1); }*
the different between mutex and semaphore ;
the following answers are from quora andstackoverflow
One difference between a mutex and a semaphore is that , mutex allows only one thread to access the Critical section whereas a semaphore with the exception of a binary semaphore allows multiple access to shared resources. That being said, it arises the next natural question, what is the difference between a binary semaphore and a mutex? The subtle thing people dont usually know about a mutex is that, mutex has a "ownership" property. That is, only the thread that locks a mutex can unlock it whereas for a semaphore, it is possible for a thread A to acquire the semaphore and some other thread B to release the semaphore.
The main difference between a Mutex and a Semaphore is the ownership. Mutex provide the ownership to the thread. It means that a thread which has locked mutex will allowed to release it. No other thread can release it. On the other hand the Semaphore has no such ownership concept. It can be other process or thread just by incrementing the count. One more prominent difference is that CPU scheduler doesn't play any role in the implementation of the Mutex where as the CPU scheduler plays a role in the implementation of the Sempahore as it can be released by any other process/thread so CPU scheduler needs to act upon after the counter has increased.For detailed fantastic articleplease read:
Thread A Thread B Take Mutex access data ... Take Mutex <== Will block ... Give Mutex access data <== Unblocks ... Give Mutex
Binary Semaphore address a totally different question:
Task A Task B
... Take BinSemaphore <== wait for something
Do Something Noteworthy
Give BinSemaphore do something <== unblocks
completons:
completion is designed to solve the synchronization among process ;the completion data structure include a wait queue head and a flag.
/* * struct completion - structure used to maintain state for a "completion" * * This is the opaque structure used to maintain the state for a "completion". * Completions currently use a FIFO to queue threads that have to wait for * the "completion" event. * * See also: complete(), wait_for_completion() (and friends _timeout, * _interruptible, _interruptible_timeout, and _killable), init_completion(), * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and * INIT_COMPLETION(). */ struct completion { unsigned int done; wait_queue_head_t wait; };thefunction corresponding to up is called complete ;It receives as an argument the address of a completion data structure, invokes spin_lock_irqsave() on the spin lock of the completion’s wait queue, increases the done field, wakes up the exclusive process sleeping in the wait wait queue, and finally invokes spin_unlock_irqrestore().
The function corresponding to down() is called wait_for_completion(). It receives as an argument the address of a completion data structure and checks the value of the done flag. If it is greater than zero, wait_for_completion() terminates, because
complete() has already been executed on another CPU. Otherwise, the function adds current to the tail of the wait queue as an exclusive process and puts current to sleep in the TASK_UNINTERRUPTIBLE state. Once woken up, the function removes current from the wait queue. Then, the function checks the value of the done flag: if it is equal to zero the function terminates, otherwise, the current process is suspended again. As in the case of the complete() function, wait_for_completion() makes use ofthe spin lock in the completion’s wait queue.
The real difference between completions and semaphores is how the spin lock included in the wait queue is used.In completions, the spin lock is used to ensure that complete() and wait_for_completion() cannot execute concurrently.In semaphores, the spin lock is used to avoid letting concurrent down()’s functions mess upthe semaphore data structure.
static inline void init_completion(struct completion *x) { <span style="white-space:pre"> </span>x->done = 0; <span style="white-space:pre"> </span>init_waitqueue_head(&x->wait); } #define init_waitqueue_head(q)<span style="white-space:pre"> </span>\ <span style="white-space:pre"> </span>do {<span style="white-space:pre"> </span>\ <span style="white-space:pre"> </span>static struct lock_class_key __key;<span style="white-space:pre"> </span>\ <span style="white-space:pre"> </span>\ <span style="white-space:pre"> </span>__init_waitqueue_head((q), &__key);<span style="white-space:pre"> </span>\ <span style="white-space:pre"> </span>}对于等待event:
/ * wait_for_completion: - waits for completion of a task * @x: holds the state of this particular completion * * This waits to be signaled for completion of a specific task. It is NOT * interruptible and there is no timeout. * * See also similar routines (i.e. wait_for_completion_timeout()) with timeout * and interrupt capability. Also see complete(). */ void __sched wait_for_completion(struct completion *x) { <span style="white-space:pre"> </span>wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); }
static long __sched wait_for_common(struct completion *x, long timeout, int state) { might_sleep();//可能发生调度 spin_lock_irq(&x->wait.lock);//fetch the lock in the waitqueue head timeout = do_wait_for_common(x, timeout, state); spin_unlock_irq(&x->wait.lock); return timeout; }
static inline long __sched do_wait_for_common(struct completion *x, long timeout, int state) { if (!x->done) { DECLARE_WAITQUEUE(wait, current); //initialize a wait __add_wait_queue_tail_exclusive(&x->wait, &wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break;//check whether Can be interrupted and //the interrupt signal is sent here } __set_current_state(state); spin_unlock_irq(&x->wait.lock); timeout = schedule_timeout(timeout); spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); if (!x->done) return timeout; } x->done--; return timeout ?: 1; }
void complete(struct completion *x) { unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); x->done++; __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL); spin_unlock_irqrestore(&x->wait.lock, flags); }
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) { unsigned flags = curr->flags; //从list上执行 对应函数 并将其唤醒; if (curr->func(curr, mode, wake_flags, key) && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } }
local interrupt disabling does not protect against concurrent accesses to data structures by interrupt handlers running on other CPUs, so in multiprocessor systems, local interrupt disabling is often coupled with spin locks ;
虽然禁止本地中断但是不能保证中断服务程序在别的cpu上运行;
宏local_irq_disable()使用cli 指令关中断,local_irq_enable()使用sti开中断;都是操作eflags寄存器的IF标志;
local_irq_save以及 local_irq_restore 的使用值得商量??何时使用以及机理??
#define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) #define local_irq_restore(flags) \ do { \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ } else { \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ } \ } while (0)
禁止可延迟函数在cpu上运行最简单的方法就是禁止在那个cpu上中断;然而实际上内核只需要禁止可延迟函数而不需要禁止中断,可以在本地CPU上激活或者禁止可延迟函数此时主要操纵thread_info描述符preempt_count字段中存放的软中断计数器;在软中断中如果软中断计数器为整数,则do_softirq()函数不会执行中断;
moreover ,tasklets are implemented on the top of softirq ,so setting this count to a positive value disable the execution of all derferable function on a given CPU ,not just softirq ;中文意思指:因为tasklets在软中断之前执行,而且他把这个计数器设置为大于0,由此禁止了给定CPU上所有的可延迟函数和软中断执行;
local_bh_disable()和local_bh_enable()两函数相匹配,local_bh_disable给cpu软中断计数器加1,相对应为减一。内核可以使用几个嵌套的local_bh_diasble;只有macros local_bh_enable与disable相匹配调用,可延迟函数才会被再次激活;
local_bh_enable perform two important operation that help to ensure execution of long-wait thread ;
1..check the sofrirq count and hardirq count in the preempt cpunt field of the local CPU ,if both are zero ,and there are pending softirq to be executed,invoke the do_doftirq ()function ;
2...check whether the tIf_need_resched flag of the CPU is set ; if so a process switch request is penging and than invoke the preempt_schedule ();
synchronization rule :
for make most use of CPU ,we should avoid using spin_lock ;aaaaaa
内核控制路径对其数据结构的保护:
大内核锁:即全局内核锁,不说了