Linux进程间通信之信号量
作者:bullbat
Linux进程间通信包括管道、消息队列、System V等等,其中System V包括三种:信号量、消息队列、共享内存,这里只简单介绍信号量机制。
在Linux编程中,要运用信号量实现互斥操作,用户空间需要调用几个系统调用,如下是一个用户空间例子。
#include
#include
#include
#include
#define SEMKEY 1234L
#define PERMS 0666
struct sembuf op_down[1]={0,-1,0};
struct sembuf op_up[1]={0,1,0};
int semid=-1;
int res;
void init_sem()
{
semid=semget(SEMKEY,0,IPC_CREAT |PERMS);
if(semid<0)
{
printf("create semaphore\n");
semid=semget(SEMKEY,1,IPC_CREAT| PERMS);
if(semid<0)
{
printf("couldn't create semaphore\n");
exit(-1);
}
res=semctl(semid,0,SETVAL,1);
}
}
void down()
{
res=semop(semid,&op_down[0],1);
}
void up()
{
res=semop(semid,&op_up[0],1);
}
int main()
{
init_sem();
printf("beforecritical code\n");
down();
printf("incritical code\n");
sleep(10);
up();
return0;
}
用户空间的程序中分为三步:
1, 调用semget系统调用创建信号量;
2, 调用semctl系统调用设置信号量初始值;
3, 调用semop系统调用实现同步互斥控制;
下面我们一步步看看内核中都是怎么实现的,内核中涉及到的关键数据结构与其主要的关系极其基本的操作如下图所示:
基本思路为:
一、从进程的相关命名空间中可以定位到信号量子空间,信号量ID值由IDR机制实现,semget从该IDR中获取信号量ID。
二、信号量的核心数据结构为sem_array,所有的与同一个sem_array相关联的sem_undo结构组成一个链表,所有的与同一个sem_array相关联的待决定信号列表sem_queue组成另一个链表,sem_queue的项目列表在更新semval为负数值时加入(semop系统调用P操作),其部分参数为从用户空间传入,sem结构的定位由sem_array结构和用户空间传入的参数确定,当更新semval为正数时(semop系统调用V操作),唤醒对应睡眠的进程。
1,semget系统调用
SYSCALL_DEFINE3(semget,key_t, key, int, nsems, int, semflg)
{
structipc_namespace *ns;
structipc_ops sem_ops;
structipc_params sem_params;
ns = current->nsproxy->ipc_ns;
if(nsems < 0 || nsems > ns->sc_semmsl)
return-EINVAL;
sem_ops.getnew = newary;
sem_ops.associate = sem_security;
sem_ops.more_checks = sem_more_checks;
sem_params.key = key;
sem_params.flg = semflg;
sem_params.u.nsems = nsems;
returnipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
}
semget的目的是得到信号量id号。Ipcget函数会调用idr系列函数从idr中得到id号,关于idr的介绍前面两篇文章(转载的)写的很详细,就不用在说了;
2,semctl系统调用
SYSCALL_DEFINE(semctl)(int semid, intsemnum, int cmd, unionsemun arg)
{
int err = -EINVAL;
intversion;
struct ipc_namespace *ns;
if(semid < 0)
return-EINVAL;
version = ipc_parse_version(&cmd);
ns = current->nsproxy->ipc_ns;
switch(cmd){
caseIPC_INFO:
caseSEM_INFO:
caseIPC_STAT:
caseSEM_STAT:
err = semctl_nolock(ns,semid, cmd, version, arg);
returnerr;
caseGETALL:
caseGETVAL:
caseGETPID:
caseGETNCNT:
caseGETZCNT:
case SETVAL:
caseSETALL:
err =semctl_main(ns,semid,semnum,cmd,version,arg);
returnerr;
caseIPC_RMID:
caseIPC_SET:
err = semctl_down(ns, semid,cmd, version, arg);
returnerr;
default:
return-EINVAL;
}
}
SETVAL是我们程序的调用路径。
static int semctl_main(struct ipc_namespace *ns, intsemid, int semnum,
intcmd, int version, unionsemun arg)
{
structsem_array *sma;
structsem* curr;
interr;
ushort fast_sem_io[SEMMSL_FAST];
ushort* sem_io = fast_sem_io;
intnsems;
sma = sem_lock_check(ns, semid);
if(IS_ERR(sma))
returnPTR_ERR(sma);
nsems = sma->sem_nsems;
err = -EACCES;
if(ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
gotoout_unlock;
err = security_sem_semctl(sma, cmd);
if(err)
gotoout_unlock;
err = -EACCES;
……
err = -EINVAL;
if(semnum< 0 || semnum >= nsems)
gotoout_unlock;
curr = &sma->sem_base[semnum];
switch(cmd) {
caseGETVAL:
err = curr->semval;
gotoout_unlock;
……
caseSETVAL:
{
intval = arg.val;
structsem_undo *un;
err = -ERANGE;
if(val > SEMVMX || val < 0)
goto out_unlock;
assert_spin_locked(&sma->sem_perm.lock);
list_for_each_entry(un,&sma->list_id, list_id)
un->semadj[semnum]= 0;
curr->semval = val;
curr->sempid= task_tgid_vnr(current);
sma->sem_ctime =get_seconds();
/*maybe some queued-up processes were waiting for this */
update_queue(sma);
err = 0;
gotoout_unlock;
}
}
out_unlock:
sem_unlock(sma);
out_free:
if(sem_io!= fast_sem_io)
ipc_free(sem_io, sizeof(ushort)*nsems);
returnerr;
}
设置VAL很简单,首先从传入的参数中获得val的值,然后设置sem_array结构list_id链表中每个sem_undo结构对应信号量的调整值为0,最后设置信号量的值后调用update_queue(sma)处理对应信号量的待决操作列表,在我们这里的执行流程中sem_queue中还没有可用项,该待决操作列表在semtimedop系统调用启用进程等待时添加具体的项。
/* Gothrough the pending queue for the indicated semaphore
* looking for tasks that can be completed.
*/
static voidupdate_queue (struct sem_array * sma)
{
interror;
structsem_queue * q;
q =list_entry(sma->sem_pending.next, structsem_queue, list);
while (&q->list!= &sma->sem_pending) {
error = try_atomic_semop(sma,q->sops, q->nsops,
q->undo, q->pid);
/*Does q->sleeper still need to sleep? */
if(error <= 0) {
struct sem_queue *n;
/*
* Continue scanning. The next operation
* that must be checked depends on the type ofthe
* completed operation:
* - if the operation modified the array, then
* restart from the head of the queue and
* check for threads that might be waiting
* forsemaphore values to become 0.
* - if the operation didn't modify the array,
* thenjust continue.
* The order of list_del() and reading->next
* is crucial: In the former case, thelist_del()
* must be done first [because we might be the
* first entry in ->sem_pending], in thelatter
* case the list_del() must be done last
* [because the list is invalid after thelist_del()]
*/
if (q->alter) {
list_del(&q->list);
n =list_entry(sma->sem_pending.next,
struct sem_queue, list);
} else {
n =list_entry(q->list.next, struct sem_queue,
list);
list_del(&q->list);
}
/* wake up the waiting thread */
q->status =IN_WAKEUP;
wake_up_process(q->sleeper);
/* hands-off: q will disappear immediately after
* writing q->status.
*/
smp_wmb();
q->status =error;
q = n;
} else{
q =list_entry(q->list.next, struct sem_queue,list);
}
}
}
遍历sem_array对应sem_queue链表中的所有项,对链表中的每一项sem进行操作,具体值的更改操作由函数try_atomic_semop完成,当try_atomic_semop返回非正值时,表示不需要再等待,此时唤醒等待进程。
/*
* Determine whether a sequence of semaphoreoperations would succeed
* all at once. Return 0 if yes, 1 if need tosleep, else return error code.
*/
static inttry_atomic_semop (struct sem_array * sma, struct sembuf * sops,
int nsops,struct sem_undo *un, intpid)
{
intresult, sem_op;
structsembuf *sop;
structsem * curr;
for(sop = sops; sop < sops + nsops; sop++) {
curr = sma->sem_base +sop->sem_num;
sem_op = sop->sem_op;
result = curr->semval;
if(!sem_op && result)
goto would_block;
result += sem_op;
if(result < 0)
goto would_block;
if(result > SEMVMX)
goto out_of_range;
if(sop->sem_flg & SEM_UNDO) {
int undo = un->semadj[sop->sem_num] - sem_op;
/*
* Exceedingthe undo range is an error.
*/
if (undo < (-SEMAEM - 1) || undo > SEMAEM)
goto out_of_range;
}
curr->semval = result;
}
sop--;
while(sop >= sops) {
sma->sem_base[sop->sem_num].sempid= pid;
if(sop->sem_flg & SEM_UNDO)
un->semadj[sop->sem_num]-= sop->sem_op;
sop--;
}
sma->sem_otime = get_seconds();
return0;
out_of_range:
result = -ERANGE;
gotoundo;
would_block:
if(sop->sem_flg & IPC_NOWAIT)
result = -EAGAIN;
else
result = 1;
undo:
sop--;
while(sop >= sops) {
sma->sem_base[sop->sem_num].semval-= sop->sem_op;
sop--;
}
returnresult;
}
由curr =sma->sem_base + sop->sem_num;定位到具体的sem项,然后result = curr->semval;result += sem_op;两条语句用于对semval值进行操作。
3, semop系统调用
最终都调用semtimedop系统调用实现,
SYSCALL_DEFINE4(semtimedop,int, semid, structsembuf __user *, tsops,
unsigned,nsops, const structtimespec __user *, timeout)
{
interror = -EINVAL;
structsem_array *sma;
structsembuf fast_sops[SEMOPM_FAST];
structsembuf* sops = fast_sops, *sop;
structsem_undo *un;
intundos = 0, alter = 0, max;
structsem_queue queue;
unsignedlong jiffies_left = 0;
structipc_namespace *ns;
ns = current->nsproxy->ipc_ns;
if(nsops < 1 || semid < 0)
return-EINVAL;
if(nsops > ns->sc_semopm)
return-E2BIG;
if(nsops> SEMOPM_FAST) {
sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
if(sops==NULL)
return -ENOMEM;
}
if(copy_from_user (sops, tsops, nsops * sizeof(*tsops))){
error=-EFAULT;
gotoout_free;
}
if(timeout) {
structtimespec _timeout;
if(copy_from_user(&_timeout, timeout, sizeof(*timeout))){
error = -EFAULT;
goto out_free;
}
if(_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
_timeout.tv_nsec>= 1000000000L){
error = -EINVAL;
goto out_free;
}
jiffies_left =timespec_to_jiffies(&_timeout);
}
max = 0;
for(sop = sops; sop < sops + nsops; sop++) {
if(sop->sem_num >= max)
max =sop->sem_num;
if(sop->sem_flg & SEM_UNDO)
undos = 1;
if(sop->sem_op != 0)
alter = 1;
}
if(undos) {
un = find_alloc_undo(ns,semid);
if(IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
}
} else
un = NULL;
sma = sem_lock_check(ns, semid);
if(IS_ERR(sma)) {
if(un)
rcu_read_unlock();
error = PTR_ERR(sma);
gotoout_free;
}
/*
* semid identifiers are not unique - find_alloc_undomay have
* allocated an undo structure, it wasinvalidated by an RMID
* and now a new array with received the sameid. Check and fail.
* This case can be detected checkingun->semid. The existance of
* "un" itself is guaranteed by rcu.
*/
error = -EIDRM;
if (un){
if(un->semid == -1) {
rcu_read_unlock();
goto out_unlock_free;
} else{
/*
* rcu lock can be released, "un"cannot disappear:
* - sem_lock is acquired, thus IPC_RMID is
* impossible.
* - exit_sem is impossible, it always operateson
* current (or a dead task).
*/
rcu_read_unlock();
}
}
error = -EFBIG;
if (max>= sma->sem_nsems)
gotoout_unlock_free;
error = -EACCES;
if(ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
gotoout_unlock_free;
error = security_sem_semop(sma, sops,nsops, alter);
if(error)
gotoout_unlock_free;
error = try_atomic_semop (sma, sops,nsops, un, task_tgid_vnr(current));
if(error <= 0) {
if(alter && error == 0)
update_queue (sma);
gotoout_unlock_free;
}
/* We need tosleep on this operation, so we put the current
* task into the pending queue and go to sleep.
*/
queue.sops = sops;
queue.nsops = nsops;
queue.undo = un;
queue.pid = task_tgid_vnr(current);
queue.alter = alter;
if(alter)
list_add_tail(&queue.list,&sma->sem_pending);
else
list_add(&queue.list,&sma->sem_pending);
queue.status = -EINTR;
queue.sleeper = current;
current->state = TASK_INTERRUPTIBLE;
sem_unlock(sma);
if(timeout)
jiffies_left =schedule_timeout(jiffies_left);
else
schedule();
error = queue.status;
while(unlikely(error== IN_WAKEUP)) {
cpu_relax();
error = queue.status;
}
if(error != -EINTR) {
/*fast path: update_queue already obtained all requested
* resources */
gotoout_free;
}
sma = sem_lock(ns, semid);
if(IS_ERR(sma)) {
error = -EIDRM;
gotoout_free;
}
/*
* If queue.status != -EINTR we are woken up byanother process
*/
error = queue.status;
if(error != -EINTR) {
gotoout_unlock_free;
}
/*
* If an interrupt occurred we have to clean upthe queue
*/
if(timeout && jiffies_left == 0)
error = -EAGAIN;
list_del(&queue.list);
out_unlock_free:
sem_unlock(sma);
out_free:
if(sops!= fast_sops)
kfree(sops);
returnerror;
}
Semtimedop系统调用首先从用户空间得到参数值,然后进行权限检查,调用try_atomic_semop函数对sem的semval进行设置,如果需要休眠(semval值为负),将该sem_queue项初始化并添加到待决操作列表,然后重新进行调度,否则调用update_queue对队列其他项进行操作和进程唤醒。