消息队列是由存放在内核中的消息组成的链表,由IPC id标识。
由msgget创建新队列或打开已经存在的队列
由msgsnd将消息添加到消息队列尾,每个消息包括正整数标识的类型,非负的长度,及数据。
由msgrcv从消息队列中取消息,不必按FIFO取消息,可以通过类型字段取相应的消息。
I.数据结构
i.msg_queue
87 /* one msq_queue structure for each present queue on the system */ 88 struct msg_queue { 89 struct kern_ipc_perm q_perm; 90 time_t q_stime; /* last msgsnd time */ 91 time_t q_rtime; /* last msgrcv time */ 92 time_t q_ctime; /* last change time */ 93 unsigned long q_cbytes; /* current number of bytes on queue */ 94 unsigned long q_qnum; /* number of messages in queue */ 95 unsigned long q_qbytes; /* max number of bytes on queue */ 96 pid_t q_lspid; /* pid of last msgsnd */ 97 pid_t q_lrpid; /* last receive pid */ 98 99 struct list_head q_messages; 100 struct list_head q_receivers; 101 struct list_head q_senders; 102 };
msg_queue:一个msg_queue对应一个消息队列
q_messages:消息链表
q_receivers:被阻塞的接收消息进程链表
q_senders:被阻塞的发送消息进程链表
ii.msg_msg
77 /* one msg_msg structure for each message */ 78 struct msg_msg { 79 struct list_head m_list; 80 long m_type; 81 int m_ts; /* message text size */ 82 struct msg_msgseg* next; 83 void *security; 84 /* the actual message follows immediately */ 85 }; 39 struct msg_msgseg { 40 struct msg_msgseg* next; 41 /* the next part of the message follows immediately */ 42 }; 43 44 #define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg)) 45 #define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg)) 46
msg_msg:一个msg_msg对应一条消息;一条消息由next组成的内存链表组成,第一个结点由msg_msg与数据组成,其它结点由msg_msgseg与数据组成;除最后一个结点外其它的结点大小均为PAGE_SIZE大小,最后一个结点大小取决于消息长度。
iii.msg_sender
60 /* one msg_sender for each sleeping sender */ 61 struct msg_sender { 62 struct list_head list; 63 struct task_struct *tsk; 64 };
msg_sender:表示被阻塞的发送消息进程,通过list组成发送阻塞进程链表中,tsk指向被阻塞进程
iv. msg_receiver
46 /* 47 * one msg_receiver structure for each sleeping receiver: 48 */ 49 struct msg_receiver { 50 struct list_head r_list; 51 struct task_struct *r_tsk; 52 53 int r_mode; 54 long r_msgtype; 55 long r_maxsize; 56 57 struct msg_msg *volatile r_msg; 58 };
msg_receiver:表示被阻塞的接收消息进程,及接收消息的属性
r_msg:用于有消息发送时,当消息满足接收消息要求,直接将消息通过r_msg发送给接收进程,而不需要放入消息队列中。
v.结构关系图
II.消息队列的创建
消息队列由newque创建:
174 /** 175 * newque - Create a new msg queue 176 * @ns: namespace 177 * @params: ptr to the structure that contains the key and msgflg 178 * 179 * Called with msg_ids.rw_mutex held (writer) 180 */ 181 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 182 { 183 struct msg_queue *msq; 184 int id, retval; 185 key_t key = params->key; 186 int msgflg = params->flg; 187 188 msq = ipc_rcu_alloc(sizeof(*msq)); 189 if (!msq) 190 return -ENOMEM; 191 192 msq->q_perm.mode = msgflg & S_IRWXUGO; 193 msq->q_perm.key = key; 194 195 msq->q_perm.security = NULL; 196 retval = security_msg_queue_alloc(msq); 197 if (retval) { 198 ipc_rcu_putref(msq); 199 return retval; 200 } 201 202 /* 203 * ipc_addid() locks msq 204 */ 205 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 206 if (id < 0) { 207 security_msg_queue_free(msq); 208 ipc_rcu_putref(msq); 209 return id; 210 } 211 212 msq->q_stime = msq->q_rtime = 0; 213 msq->q_ctime = get_seconds(); 214 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_qbytes = ns->msg_ctlmnb; 216 msq->q_lspid = msq->q_lrpid = 0; 217 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_senders); 220 221 msg_unlock(msq); 222 223 return msq->q_perm.id; 224 }
1.创建msg_queue结构
2.将msg_queue添加到消息队列基数树中,并取回基数树id
3.初始化msg_queue结构,如初始化消息链表、被阻塞接收进程链表等
III.消息队列的移除
272 /* 273 * freeque() wakes up waiters on the sender and receiver waiting queue, 274 * removes the message queue from message queue ID IDR, and cleans up all the 275 * messages associated with this queue. 276 * 277 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 278 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 279 */ 280 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 281 { 282 struct list_head *tmp; 283 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 284 285 expunge_all(msq, -EIDRM); 286 ss_wakeup(&msq->q_senders, 1); 287 msg_rmid(ns, msq); 288 msg_unlock(msq); 289 290 tmp = msq->q_messages.next; 291 while (tmp != &msq->q_messages) { 292 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 293 294 tmp = tmp->next; 295 atomic_dec(&ns->msg_hdrs); 296 free_msg(msg); 297 } 298 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 299 security_msg_queue_free(msq); 300 ipc_rcu_putref(msq); 301 }
1.唤醒所有被阻塞的消息接收进程,并通知消息队列被移除EIDRM
2.唤醒所有被阻塞的消息发送进程
3.将消息队列从消息队列基数树中移除;msq->q_perm.deleted=1,通知被唤醒的发送进程消息队列被删除(freeque已经获取了spinlock,而被唤醒的发送进程要获取spinloc再检查msq->q_perm.deleted)
4.释放消息队列中消息所使用的内存
5.将消息队列的消息长度计数从系统消息长度计数中删除
6.删除msg_queue
IV.消息
i.load_msg
47 struct msg_msg *load_msg(const void __user *src, int len) 48 { 49 struct msg_msg *msg; 50 struct msg_msgseg **pseg; 51 int err; 52 int alen; 53 54 alen = len; 55 if (alen > DATALEN_MSG) 56 alen = DATALEN_MSG; 57 58 msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); 59 if (msg == NULL) 60 return ERR_PTR(-ENOMEM); 61 62 msg->next = NULL; 63 msg->security = NULL; 64 65 if (copy_from_user(msg + 1, src, alen)) { 66 err = -EFAULT; 67 goto out_err; 68 } 69 70 len -= alen; 71 src = ((char __user *)src) + alen; 72 pseg = &msg->next; 73 while (len > 0) { 74 struct msg_msgseg *seg; 75 alen = len; 76 if (alen > DATALEN_SEG) 77 alen = DATALEN_SEG; 78 seg = kmalloc(sizeof(*seg) + alen, 79 GFP_KERNEL); 80 if (seg == NULL) { 81 err = -ENOMEM; 82 goto out_err; 83 } 84 *pseg = seg; 85 seg->next = NULL; 86 if (copy_from_user(seg + 1, src, alen)) { 87 err = -EFAULT; 88 goto out_err; 89 } 90 pseg = &seg->next; 91 len -= alen; 92 src = ((char __user *)src) + alen; 93 } 94 95 err = security_msg_msg_alloc(msg); 96 if (err) 97 goto out_err; 98 99 return msg; 100 101 out_err: 102 free_msg(msg); 103 return ERR_PTR(err); 104 }
load_msg用于将用户空间的信息数据复制到内核内存中
注:
一条消息是由内存链表组成,每个结点内存从通用slab中获取;
每个结点均由管理信息与数据组成,第一个结点由msg_msg管理,其它由msg_msgseg管理;
除最后一个结点外,其它结点大小均为PAGE_SIZE大小;不直接取页帧是因为如果有很多小消息(远小于PAGE_SIZE)的话会浪费内存
ii.store_msg
106 int store_msg(void __user *dest, struct msg_msg *msg, int len) 107 { 108 int alen; 109 struct msg_msgseg *seg; 110 111 alen = len; 112 if (alen > DATALEN_MSG) 113 alen = DATALEN_MSG; 114 if (copy_to_user(dest, msg + 1, alen)) 115 return -1; 116 117 len -= alen; 118 dest = ((char __user *)dest) + alen; 119 seg = msg->next; 120 while (len > 0) { 121 alen = len; 122 if (alen > DATALEN_SEG) 123 alen = DATALEN_SEG; 124 if (copy_to_user(dest, seg + 1, alen)) 125 return -1; 126 len -= alen; 127 dest = ((char __user *)dest) + alen; 128 seg = seg->next; 129 } 130 return 0; 131 }
store_msg用于将消息数据从内核内存中复制到进程用户空间中
iii.free_msg
133 void free_msg(struct msg_msg *msg) 134 { 135 struct msg_msgseg *seg; 136 137 security_msg_msg_free(msg); 138 139 seg = msg->next; 140 kfree(msg); 141 while (seg != NULL) { 142 struct msg_msgseg *tmp = seg->next; 143 kfree(seg); 144 seg = tmp; 145 } 146 }
free_msg用于释放消息所使用的内核slab内存
IV.发送消息
636 long do_msgsnd(int msqid, long mtype, void __user *mtext, 637 size_t msgsz, int msgflg) 638 { 639 struct msg_queue *msq; 640 struct msg_msg *msg; 641 int err; 642 struct ipc_namespace *ns; 643 644 ns = current->nsproxy->ipc_ns; 645 646 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 647 return -EINVAL; 648 if (mtype < 1) 649 return -EINVAL; 650 651 msg = load_msg(mtext, msgsz); 652 if (IS_ERR(msg)) 653 return PTR_ERR(msg); 654 655 msg->m_type = mtype; 656 msg->m_ts = msgsz; 657 658 msq = msg_lock_check(ns, msqid); 659 if (IS_ERR(msq)) { 660 err = PTR_ERR(msq); 661 goto out_free; 662 } 663 664 for (;;) { 665 struct msg_sender s; 666 667 err = -EACCES; 668 if (ipcperms(&msq->q_perm, S_IWUGO)) 669 goto out_unlock_free; 670 671 err = security_msg_queue_msgsnd(msq, msg, msgflg); 672 if (err) 673 goto out_unlock_free; 674 675 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 676 1 + msq->q_qnum <= msq->q_qbytes) { 677 break; 678 } 679 680 /* queue full, wait: */ 681 if (msgflg & IPC_NOWAIT) { 682 err = -EAGAIN; 683 goto out_unlock_free; 684 } 685 ss_add(msq, &s); 686 ipc_rcu_getref(msq); 687 msg_unlock(msq); 688 schedule(); 689 690 ipc_lock_by_ptr(&msq->q_perm); 691 ipc_rcu_putref(msq); 692 if (msq->q_perm.deleted) { 693 err = -EIDRM; 694 goto out_unlock_free; 695 } 696 ss_del(&s); 697 698 if (signal_pending(current)) { 699 err = -ERESTARTNOHAND; 700 goto out_unlock_free; 701 } 702 } 703 704 msq->q_lspid = task_tgid_vnr(current); 705 msq->q_stime = get_seconds(); 706 707 if (!pipelined_send(msq, msg)) { 708 /* noone is waiting for this message, enqueue it */ 709 list_add_tail(&msg->m_list, &msq->q_messages); 710 msq->q_cbytes += msgsz; 711 msq->q_qnum++; 712 atomic_add(msgsz, &ns->msg_bytes); 713 atomic_inc(&ns->msg_hdrs); 714 } 715 716 err = 0; 717 msg = NULL; 718 719 out_unlock_free: 720 msg_unlock(msq); 721 out_free: 722 if (msg != NULL) 723 free_msg(msg); 724 return err; 725 } 726 727 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 728 int, msgflg) 729 { 730 long mtype; 731 732 if (get_user(mtype, &msgp->mtype)) 733 return -EFAULT; 734 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 735 }
1.参数检查
2.分配消息内存,并将消息复制到内核内存中
3.权限检查
4.检查消息队列是否已满
A.如果消息队列已满
a.如果IPC_NOWAIT置位,返回EAGAIN通知用户进程再次尝试发送;
b.如果IPC_NOWAIT未置位,阻塞发送进程;
c.阻塞进程被唤醒时检查消息队列是否被删除,如果被删除返回EIDRM通知用户进程消息队列被删除,否则继续检查消息队列是否已满
B.如果消息队列未满
a.如果有被阻塞的接收进程,且消息满足接收要求,则将消息直接发送给被阻塞的接收进程
b.否则,将消息排入消息队列尾
V.接收消息
756 long do_msgrcv(int msqid, long *pmtype, void __user *mtext, 757 size_t msgsz, long msgtyp, int msgflg) 758 { 759 struct msg_queue *msq; 760 struct msg_msg *msg; 761 int mode; 762 struct ipc_namespace *ns; 763 764 if (msqid < 0 || (long) msgsz < 0) 765 return -EINVAL; 766 mode = convert_mode(&msgtyp, msgflg); 767 ns = current->nsproxy->ipc_ns; 768 769 msq = msg_lock_check(ns, msqid); 770 if (IS_ERR(msq)) 771 return PTR_ERR(msq); 772 773 for (;;) { 774 struct msg_receiver msr_d; 775 struct list_head *tmp; 776 777 msg = ERR_PTR(-EACCES); 778 if (ipcperms(&msq->q_perm, S_IRUGO)) 779 goto out_unlock; 780 781 msg = ERR_PTR(-EAGAIN); 782 tmp = msq->q_messages.next; 783 while (tmp != &msq->q_messages) { 784 struct msg_msg *walk_msg; 785 786 walk_msg = list_entry(tmp, struct msg_msg, m_list); 787 if (testmsg(walk_msg, msgtyp, mode) && 788 !security_msg_queue_msgrcv(msq, walk_msg, current, 789 msgtyp, mode)) { 790 791 msg = walk_msg; 792 if (mode == SEARCH_LESSEQUAL && 793 walk_msg->m_type != 1) { 794 msg = walk_msg; 795 msgtyp = walk_msg->m_type - 1; 796 } else { 797 msg = walk_msg; 798 break; 799 } 800 } 801 tmp = tmp->next; 802 } 803 if (!IS_ERR(msg)) { 804 /* 805 * Found a suitable message. 806 * Unlink it from the queue. 807 */ 808 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 809 msg = ERR_PTR(-E2BIG); 810 goto out_unlock; 811 } 812 list_del(&msg->m_list); 813 msq->q_qnum--; 814 msq->q_rtime = get_seconds(); 815 msq->q_lrpid = task_tgid_vnr(current); 816 msq->q_cbytes -= msg->m_ts; 817 atomic_sub(msg->m_ts, &ns->msg_bytes); 818 atomic_dec(&ns->msg_hdrs); 819 ss_wakeup(&msq->q_senders, 0); 820 msg_unlock(msq); 821 break; 822 } 823 /* No message waiting. Wait for a message */ 824 if (msgflg & IPC_NOWAIT) { 825 msg = ERR_PTR(-ENOMSG); 826 goto out_unlock; 827 } 828 list_add_tail(&msr_d.r_list, &msq->q_receivers); 829 msr_d.r_tsk = current; 830 msr_d.r_msgtype = msgtyp; 831 msr_d.r_mode = mode; 832 if (msgflg & MSG_NOERROR) 833 msr_d.r_maxsize = INT_MAX; 834 else 835 msr_d.r_maxsize = msgsz; 836 msr_d.r_msg = ERR_PTR(-EAGAIN); 837 current->state = TASK_INTERRUPTIBLE; 838 msg_unlock(msq); 839 840 schedule(); 841 842 /* Lockless receive, part 1: 843 * Disable preemption. We don't hold a reference to the queue 844 * and getting a reference would defeat the idea of a lockless 845 * operation, thus the code relies on rcu to guarantee the 846 * existance of msq: 847 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 848 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 849 * rcu_read_lock() prevents preemption between reading r_msg 850 * and the spin_lock() inside ipc_lock_by_ptr(). 851 */ 852 rcu_read_lock(); 853 854 /* Lockless receive, part 2: 855 * Wait until pipelined_send or expunge_all are outside of 856 * wake_up_process(). There is a race with exit(), see 857 * ipc/mqueue.c for the details. 858 */ 859 msg = (struct msg_msg*)msr_d.r_msg; 860 while (msg == NULL) { 861 cpu_relax(); 862 msg = (struct msg_msg *)msr_d.r_msg; 863 } 864 865 /* Lockless receive, part 3: 866 * If there is a message or an error then accept it without 867 * locking. 868 */ 869 if (msg != ERR_PTR(-EAGAIN)) { 870 rcu_read_unlock(); 871 break; 872 } 873 874 /* Lockless receive, part 3: 875 * Acquire the queue spinlock. 876 */ 877 ipc_lock_by_ptr(&msq->q_perm); 878 rcu_read_unlock(); 879 880 /* Lockless receive, part 4: 881 * Repeat test after acquiring the spinlock. 882 */ 883 msg = (struct msg_msg*)msr_d.r_msg; 884 if (msg != ERR_PTR(-EAGAIN)) 885 goto out_unlock; 886 887 list_del(&msr_d.r_list); 888 if (signal_pending(current)) { 889 msg = ERR_PTR(-ERESTARTNOHAND); 890 out_unlock: 891 msg_unlock(msq); 892 break; 893 } 894 } 895 if (IS_ERR(msg)) 896 return PTR_ERR(msg); 897 898 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; 899 *pmtype = msg->m_type; 900 if (store_msg(mtext, msg, msgsz)) 901 msgsz = -EFAULT; 902 903 free_msg(msg); 904 905 return msgsz; 906 } 907 908 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 909 long, msgtyp, int, msgflg) 910 { 911 long err, mtype; 912 913 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); 914 if (err < 0) 915 goto out; 916 917 if (put_user(mtype, &msgp->mtype)) 918 err = -EFAULT; 919 out: 920 return err; 921 }
1.参数检查及权限检查
2.如果有满足接收要求的消息(消息队列中有消息,且类型、长度都满足要求)
a.将消息从消息队列中取出,并复制到用户地址空间
b.释放消息所占用的内核内存
c.尝试唤醒被阻塞的第一个消息发送进程
3.如果没有满足接收要求的消息
A.如果IPC_NOWAIT置位,返回ENOMSG通知用户进程没有消息
B.如果IPC_NOWAIT未置位,阻塞消息接收进程
C.阻塞进程被唤醒
a.如果因有满足接收要求的消息发送,同2的a和b的处理;
b.如果因为信号发送,则先做信号处理;再自动重新调用msgsnd
c.否则接收进程继续被阻塞