linux SysV IPC msg消息队列实现

消息队列是由存放在内核中的消息组成的链表,由IPC id标识。
由msgget创建新队列或打开已经存在的队列
由msgsnd将消息添加到消息队列尾,每个消息包括正整数标识的类型,非负的长度,及数据。
由msgrcv从消息队列中取消息,不必按FIFO取消息,可以通过类型字段取相应的消息。

 

I.数据结构
i.msg_queue

 87 /* one msq_queue structure for each present queue on the system */
 88 struct msg_queue {
 89         struct kern_ipc_perm q_perm;
 90         time_t q_stime;                 /* last msgsnd time */
 91         time_t q_rtime;                 /* last msgrcv time */
 92         time_t q_ctime;                 /* last change time */
 93         unsigned long q_cbytes;         /* current number of bytes on queue */
 94         unsigned long q_qnum;           /* number of messages in queue */
 95         unsigned long q_qbytes;         /* max number of bytes on queue */
 96         pid_t q_lspid;                  /* pid of last msgsnd */
 97         pid_t q_lrpid;                  /* last receive pid */
 98 
 99         struct list_head q_messages;
100         struct list_head q_receivers;
101         struct list_head q_senders;
102 };

msg_queue:一个msg_queue对应一个消息队列
q_messages:消息链表
q_receivers:被阻塞的接收消息进程链表
q_senders:被阻塞的发送消息进程链表

ii.msg_msg

 77 /* one msg_msg structure for each message */
 78 struct msg_msg {
 79         struct list_head m_list;
 80         long  m_type;
 81         int m_ts;           /* message text size */
 82         struct msg_msgseg* next;
 83         void *security;
 84         /* the actual message follows immediately */
 85 };

 39 struct msg_msgseg {
 40         struct msg_msgseg* next;
 41         /* the next part of the message follows immediately */
 42 };
 43 
 44 #define DATALEN_MSG     (PAGE_SIZE-sizeof(struct msg_msg))
 45 #define DATALEN_SEG     (PAGE_SIZE-sizeof(struct msg_msgseg))
 46 

msg_msg:一个msg_msg对应一条消息;一条消息由next组成的内存链表组成,第一个结点由msg_msg与数据组成,其它结点由msg_msgseg与数据组成;除最后一个结点外其它的结点大小均为PAGE_SIZE大小,最后一个结点大小取决于消息长度。

iii.msg_sender

 60 /* one msg_sender for each sleeping sender */
 61 struct msg_sender {
 62         struct list_head        list;
 63         struct task_struct      *tsk;
 64 };

msg_sender:表示被阻塞的发送消息进程,通过list组成发送阻塞进程链表中,tsk指向被阻塞进程

 

iv. msg_receiver

 46 /*
 47  * one msg_receiver structure for each sleeping receiver:
 48  */
 49 struct msg_receiver {
 50         struct list_head        r_list;
 51         struct task_struct      *r_tsk;
 52 
 53         int                     r_mode;
 54         long                    r_msgtype;
 55         long                    r_maxsize;
 56 
 57         struct msg_msg          *volatile r_msg;
 58 };

msg_receiver:表示被阻塞的接收消息进程,及接收消息的属性
r_msg:用于有消息发送时,当消息满足接收消息要求,直接将消息通过r_msg发送给接收进程,而不需要放入消息队列中。

 

v.结构关系图

linux SysV IPC msg消息队列实现_第1张图片

II.消息队列的创建
消息队列由newque创建:

174 /**
175  * newque - Create a new msg queue
176  * @ns: namespace
177  * @params: ptr to the structure that contains the key and msgflg
178  *
179  * Called with msg_ids.rw_mutex held (writer)
180  */
181 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
182 {
183         struct msg_queue *msq;
184         int id, retval;
185         key_t key = params->key;
186         int msgflg = params->flg;
187 
188         msq = ipc_rcu_alloc(sizeof(*msq));
189         if (!msq)
190                 return -ENOMEM;
191 
192         msq->q_perm.mode = msgflg & S_IRWXUGO;
193         msq->q_perm.key = key;
194 
195         msq->q_perm.security = NULL;
196         retval = security_msg_queue_alloc(msq);
197         if (retval) {
198                 ipc_rcu_putref(msq);
199                 return retval;
200         }
201 
202         /*
203          * ipc_addid() locks msq
204          */
205         id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
206         if (id < 0) {
207                 security_msg_queue_free(msq);
208                 ipc_rcu_putref(msq);
209                 return id;
210         }
211 
212         msq->q_stime = msq->q_rtime = 0;
213         msq->q_ctime = get_seconds();
214         msq->q_cbytes = msq->q_qnum = 0;
215         msq->q_qbytes = ns->msg_ctlmnb;
216         msq->q_lspid = msq->q_lrpid = 0;
217         INIT_LIST_HEAD(&msq->q_messages);
218         INIT_LIST_HEAD(&msq->q_receivers);
219         INIT_LIST_HEAD(&msq->q_senders);
220 
221         msg_unlock(msq);
222 
223         return msq->q_perm.id;
224 }

1.创建msg_queue结构
2.将msg_queue添加到消息队列基数树中,并取回基数树id
3.初始化msg_queue结构,如初始化消息链表、被阻塞接收进程链表等

 

III.消息队列的移除

272 /*
273  * freeque() wakes up waiters on the sender and receiver waiting queue,
274  * removes the message queue from message queue ID IDR, and cleans up all the
275  * messages associated with this queue.
276  *
277  * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
278  * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
279  */
280 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
281 {
282         struct list_head *tmp;
283         struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
284 
285         expunge_all(msq, -EIDRM);
286         ss_wakeup(&msq->q_senders, 1);
287         msg_rmid(ns, msq);
288         msg_unlock(msq);
289 
290         tmp = msq->q_messages.next;
291         while (tmp != &msq->q_messages) {
292                 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
293 
294                 tmp = tmp->next;
295                 atomic_dec(&ns->msg_hdrs);
296                 free_msg(msg);
297         }
298         atomic_sub(msq->q_cbytes, &ns->msg_bytes);
299         security_msg_queue_free(msq);
300         ipc_rcu_putref(msq);
301 }

1.唤醒所有被阻塞的消息接收进程,并通知消息队列被移除EIDRM
2.唤醒所有被阻塞的消息发送进程
3.将消息队列从消息队列基数树中移除;msq->q_perm.deleted=1,通知被唤醒的发送进程消息队列被删除(freeque已经获取了spinlock,而被唤醒的发送进程要获取spinloc再检查msq->q_perm.deleted)
4.释放消息队列中消息所使用的内存
5.将消息队列的消息长度计数从系统消息长度计数中删除
6.删除msg_queue

 

IV.消息

i.load_msg

 47 struct msg_msg *load_msg(const void __user *src, int len)
 48 {
 49         struct msg_msg *msg;
 50         struct msg_msgseg **pseg;
 51         int err;
 52         int alen;
 53 
 54         alen = len;
 55         if (alen > DATALEN_MSG)
 56                 alen = DATALEN_MSG;
 57 
 58         msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
 59         if (msg == NULL)
 60                 return ERR_PTR(-ENOMEM);
 61 
 62         msg->next = NULL;
 63         msg->security = NULL;
 64 
 65         if (copy_from_user(msg + 1, src, alen)) {
 66                 err = -EFAULT;
 67                 goto out_err;
 68         }
 69 
 70         len -= alen;
 71         src = ((char __user *)src) + alen;
 72         pseg = &msg->next;
 73         while (len > 0) {
 74                 struct msg_msgseg *seg;
 75                 alen = len;
 76                 if (alen > DATALEN_SEG)
 77                         alen = DATALEN_SEG;
 78                 seg = kmalloc(sizeof(*seg) + alen,
 79                                                  GFP_KERNEL);
 80                 if (seg == NULL) {
 81                         err = -ENOMEM;
 82                         goto out_err;
 83                 }
 84                 *pseg = seg;
 85                 seg->next = NULL;
 86                 if (copy_from_user(seg + 1, src, alen)) {
 87                         err = -EFAULT;
 88                         goto out_err;
 89                 }
 90                 pseg = &seg->next;
 91                 len -= alen;
 92                 src = ((char __user *)src) + alen;
 93         }
 94 
 95         err = security_msg_msg_alloc(msg);
 96         if (err)
 97                 goto out_err;
 98 
 99         return msg;
100 
101 out_err:
102         free_msg(msg);
103         return ERR_PTR(err);
104 }

load_msg用于将用户空间的信息数据复制到内核内存中
注:
一条消息是由内存链表组成,每个结点内存从通用slab中获取;
每个结点均由管理信息与数据组成,第一个结点由msg_msg管理,其它由msg_msgseg管理;
除最后一个结点外,其它结点大小均为PAGE_SIZE大小;不直接取页帧是因为如果有很多小消息(远小于PAGE_SIZE)的话会浪费内存


ii.store_msg

106 int store_msg(void __user *dest, struct msg_msg *msg, int len)
107 {
108         int alen;
109         struct msg_msgseg *seg;
110 
111         alen = len;
112         if (alen > DATALEN_MSG)
113                 alen = DATALEN_MSG;
114         if (copy_to_user(dest, msg + 1, alen))
115                 return -1;
116 
117         len -= alen;
118         dest = ((char __user *)dest) + alen;
119         seg = msg->next;
120         while (len > 0) {
121                 alen = len;
122                 if (alen > DATALEN_SEG)
123                         alen = DATALEN_SEG;
124                 if (copy_to_user(dest, seg + 1, alen))
125                         return -1;
126                 len -= alen;
127                 dest = ((char __user *)dest) + alen;
128                 seg = seg->next;
129         }
130         return 0;
131 }

store_msg用于将消息数据从内核内存中复制到进程用户空间中

 

iii.free_msg

133 void free_msg(struct msg_msg *msg)
134 {
135         struct msg_msgseg *seg;
136 
137         security_msg_msg_free(msg);
138 
139         seg = msg->next;
140         kfree(msg);
141         while (seg != NULL) {
142                 struct msg_msgseg *tmp = seg->next;
143                 kfree(seg);
144                 seg = tmp;
145         }
146 }

free_msg用于释放消息所使用的内核slab内存


IV.发送消息

636 long do_msgsnd(int msqid, long mtype, void __user *mtext,
637                 size_t msgsz, int msgflg)
638 {
639         struct msg_queue *msq;
640         struct msg_msg *msg;
641         int err;
642         struct ipc_namespace *ns;
643 
644         ns = current->nsproxy->ipc_ns;
645 
646         if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
647                 return -EINVAL;
648         if (mtype < 1)
649                 return -EINVAL;
650 
651         msg = load_msg(mtext, msgsz);
652         if (IS_ERR(msg))
653                 return PTR_ERR(msg);
654 
655         msg->m_type = mtype;
656         msg->m_ts = msgsz;
657 
658         msq = msg_lock_check(ns, msqid);
659         if (IS_ERR(msq)) {
660                 err = PTR_ERR(msq);
661                 goto out_free;
662         }
663 
664         for (;;) {
665                 struct msg_sender s;
666 
667                 err = -EACCES;
668                 if (ipcperms(&msq->q_perm, S_IWUGO))
669                         goto out_unlock_free;
670 
671                 err = security_msg_queue_msgsnd(msq, msg, msgflg);
672                 if (err)
673                         goto out_unlock_free;
674 
675                 if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
676                                 1 + msq->q_qnum <= msq->q_qbytes) {
677                         break;
678                 }
679 
680                 /* queue full, wait: */
681                 if (msgflg & IPC_NOWAIT) {
682                         err = -EAGAIN;
683                         goto out_unlock_free;
684                 }
685                 ss_add(msq, &s);
686                 ipc_rcu_getref(msq);
687                 msg_unlock(msq);
688                 schedule();
689 
690                 ipc_lock_by_ptr(&msq->q_perm);
691                 ipc_rcu_putref(msq);
692                 if (msq->q_perm.deleted) {
693                         err = -EIDRM;
694                         goto out_unlock_free;
695                 }
696                 ss_del(&s);
697 
698                 if (signal_pending(current)) {
699                         err = -ERESTARTNOHAND;
700                         goto out_unlock_free;
701                 }
702         }
703 
704         msq->q_lspid = task_tgid_vnr(current);
705         msq->q_stime = get_seconds();
706 
707         if (!pipelined_send(msq, msg)) {
708                 /* noone is waiting for this message, enqueue it */
709                 list_add_tail(&msg->m_list, &msq->q_messages);
710                 msq->q_cbytes += msgsz;
711                 msq->q_qnum++;
712                 atomic_add(msgsz, &ns->msg_bytes);
713                 atomic_inc(&ns->msg_hdrs);
714         }
715 
716         err = 0;
717         msg = NULL;
718 
719 out_unlock_free:
720         msg_unlock(msq);
721 out_free:
722         if (msg != NULL)
723                 free_msg(msg);
724         return err;
725 }
726 
727 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
728                 int, msgflg)
729 {
730         long mtype;
731 
732         if (get_user(mtype, &msgp->mtype))
733                 return -EFAULT;
734         return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
735 }

1.参数检查
2.分配消息内存,并将消息复制到内核内存中
3.权限检查
4.检查消息队列是否已满
  A.如果消息队列已满
    a.如果IPC_NOWAIT置位,返回EAGAIN通知用户进程再次尝试发送;
    b.如果IPC_NOWAIT未置位,阻塞发送进程;
    c.阻塞进程被唤醒时检查消息队列是否被删除,如果被删除返回EIDRM通知用户进程消息队列被删除,否则继续检查消息队列是否已满
  B.如果消息队列未满
    a.如果有被阻塞的接收进程,且消息满足接收要求,则将消息直接发送给被阻塞的接收进程
    b.否则,将消息排入消息队列尾


V.接收消息

756 long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
757                 size_t msgsz, long msgtyp, int msgflg)
758 {
759         struct msg_queue *msq;
760         struct msg_msg *msg;
761         int mode;
762         struct ipc_namespace *ns;
763 
764         if (msqid < 0 || (long) msgsz < 0)
765                 return -EINVAL;
766         mode = convert_mode(&msgtyp, msgflg);
767         ns = current->nsproxy->ipc_ns;
768 
769         msq = msg_lock_check(ns, msqid);
770         if (IS_ERR(msq))
771                 return PTR_ERR(msq);
772 
773         for (;;) {
774                 struct msg_receiver msr_d;
775                 struct list_head *tmp;
776 
777                 msg = ERR_PTR(-EACCES);
778                 if (ipcperms(&msq->q_perm, S_IRUGO))
779                         goto out_unlock;
780 
781                 msg = ERR_PTR(-EAGAIN);
782                 tmp = msq->q_messages.next;
783                 while (tmp != &msq->q_messages) {
784                         struct msg_msg *walk_msg;
785 
786                         walk_msg = list_entry(tmp, struct msg_msg, m_list);
787                         if (testmsg(walk_msg, msgtyp, mode) &&
788                             !security_msg_queue_msgrcv(msq, walk_msg, current,
789                                                        msgtyp, mode)) {
790 
791                                 msg = walk_msg;
792                                 if (mode == SEARCH_LESSEQUAL &&
793                                                 walk_msg->m_type != 1) {
794                                         msg = walk_msg;
795                                         msgtyp = walk_msg->m_type - 1;
796                                 } else {
797                                         msg = walk_msg;
798                                         break;
799                                 }
800                         }
801                         tmp = tmp->next;
802                 }
803                 if (!IS_ERR(msg)) {
804                         /*
805                          * Found a suitable message.
806                          * Unlink it from the queue.
807                          */
808                         if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
809                                 msg = ERR_PTR(-E2BIG);
810                                 goto out_unlock;
811                         }
812                         list_del(&msg->m_list);
813                         msq->q_qnum--;
814                         msq->q_rtime = get_seconds();
815                         msq->q_lrpid = task_tgid_vnr(current);
816                         msq->q_cbytes -= msg->m_ts;
817                         atomic_sub(msg->m_ts, &ns->msg_bytes);
818                         atomic_dec(&ns->msg_hdrs);
819                         ss_wakeup(&msq->q_senders, 0);
820                         msg_unlock(msq);
821                         break;
822                 }
823                 /* No message waiting. Wait for a message */
824                 if (msgflg & IPC_NOWAIT) {
825                         msg = ERR_PTR(-ENOMSG);
826                         goto out_unlock;
827                 }
828                 list_add_tail(&msr_d.r_list, &msq->q_receivers);
829                 msr_d.r_tsk = current;
830                 msr_d.r_msgtype = msgtyp;
831                 msr_d.r_mode = mode;
832                 if (msgflg & MSG_NOERROR)
833                         msr_d.r_maxsize = INT_MAX;
834                 else
835                         msr_d.r_maxsize = msgsz;
836                 msr_d.r_msg = ERR_PTR(-EAGAIN);
837                 current->state = TASK_INTERRUPTIBLE;
838                 msg_unlock(msq);
839 
840                 schedule();
841 
842                 /* Lockless receive, part 1:
843                  * Disable preemption.  We don't hold a reference to the queue
844                  * and getting a reference would defeat the idea of a lockless
845                  * operation, thus the code relies on rcu to guarantee the
846                  * existance of msq:
847                  * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
848                  * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
849                  * rcu_read_lock() prevents preemption between reading r_msg
850                  * and the spin_lock() inside ipc_lock_by_ptr().
851                  */
852                 rcu_read_lock();
853 
854                 /* Lockless receive, part 2:
855                  * Wait until pipelined_send or expunge_all are outside of
856                  * wake_up_process(). There is a race with exit(), see
857                  * ipc/mqueue.c for the details.
858                  */
859                 msg = (struct msg_msg*)msr_d.r_msg;
860                 while (msg == NULL) {
861                         cpu_relax();
862                         msg = (struct msg_msg *)msr_d.r_msg;
863                 }
864 
865                 /* Lockless receive, part 3:
866                  * If there is a message or an error then accept it without
867                  * locking.
868                  */
869                 if (msg != ERR_PTR(-EAGAIN)) {
870                         rcu_read_unlock();
871                         break;
872                 }
873 
874                 /* Lockless receive, part 3:
875                  * Acquire the queue spinlock.
876                  */
877                 ipc_lock_by_ptr(&msq->q_perm);
878                 rcu_read_unlock();
879 
880                 /* Lockless receive, part 4:
881                  * Repeat test after acquiring the spinlock.
882                  */
883                 msg = (struct msg_msg*)msr_d.r_msg;
884                 if (msg != ERR_PTR(-EAGAIN))
885                         goto out_unlock;
886 
887                 list_del(&msr_d.r_list);
888                 if (signal_pending(current)) {
889                         msg = ERR_PTR(-ERESTARTNOHAND);
890 out_unlock:
891                         msg_unlock(msq);
892                         break;
893                 }
894         }
895         if (IS_ERR(msg))
896                 return PTR_ERR(msg);
897 
898         msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
899         *pmtype = msg->m_type;
900         if (store_msg(mtext, msg, msgsz))
901                 msgsz = -EFAULT;
902 
903         free_msg(msg);
904 
905         return msgsz;
906 }
907 
908 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
909                 long, msgtyp, int, msgflg)
910 {
911         long err, mtype;
912 
913         err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
914         if (err < 0)
915                 goto out;
916 
917         if (put_user(mtype, &msgp->mtype))
918                 err = -EFAULT;
919 out:
920         return err;
921 }

1.参数检查及权限检查
2.如果有满足接收要求的消息(消息队列中有消息,且类型、长度都满足要求)
  a.将消息从消息队列中取出,并复制到用户地址空间
  b.释放消息所占用的内核内存
  c.尝试唤醒被阻塞的第一个消息发送进程
3.如果没有满足接收要求的消息
  A.如果IPC_NOWAIT置位,返回ENOMSG通知用户进程没有消息
  B.如果IPC_NOWAIT未置位,阻塞消息接收进程
  C.阻塞进程被唤醒
    a.如果因有满足接收要求的消息发送,同2的a和b的处理;
    b.如果因为信号发送,则先做信号处理;再自动重新调用msgsnd
    c.否则接收进程继续被阻塞
 

你可能感兴趣的:(linux SysV IPC msg消息队列实现)