本篇博客所涉及到的linux源码来自linux2.6,通过从应用->底层实现来分析分析整个消息队列是如何搭建的
首先我来看看消息队列是如何应用,因为应用层反应的是整个消息队列大概逻辑,有利于我们对于底层代码的理解
1、首先什么是消息队列?
消息队列,是消息的链接表,存放在内核中。一个消息队列由一个标识符(即ID)来标识。(那我们思绪扩展一下,如果进程创建了很多的消息队列,每一个消息队列都对应着一个标识符。如果我们想通过标识符找到相应队列,那进程中是不是应该要有一个类似于数组的结构将标识符存起来,方便我们查找)
2、那消息队列的标识符是什么
消息队列的标识符key(键), 它的基本类型是key_t,在头文件
明白了这两个概念我们来看看应用层,是如何建立起两个进程间的队列通信的吧
图中所提到的消息队列的应用层函数如下所示:
1 #include <sys/msg.h>
2 // 创建或打开消息队列:成功返回队列ID,失败返回-1
3 int msgget(key_t key, int flag);
4 // 添加消息:成功返回0,失败返回-1
5 int msgsnd(int msqid, const void *ptr, size_t size, int flag);
6 // 读取消息:成功返回消息数据的长度,失败返回-1
7 int msgrcv(int msqid, void *ptr, size_t size, long type,int flag);
8 // 控制消息队列:成功返回0,失败返回-1
9 int msgctl(int msqid, int cmd, struct msqid_ds *buf);
10//系统IPC键值的格式转换函数
11 key_t ftok( const char * fname, int id )
再来看一个实例
read.c文件
#include
#include
#include
#include
#include
struct msgbuf{
long mtype; /* message type, must be > 0 */
char mtext[128]; /* message data */
};
int main()
{
struct msgbuf sendbuf={999,"888 message already received"};
struct msgbuf readbuf;
int msgid = 0;
key_t key;
key = ftok(".",'z');//获取键值
printf("key=%x\n",key);
msgid=msgget(key,IPC_CREAT|0777);//在内核中打开或建立键值为key的,权限为0777的消息队列
if(msgid == -1){
printf("create msgq failure\n");
}
msgrcv(msgid,&readbuf,sizeof(readbuf.mtext),888,0);//从队列中获取888类型的数据,如果队列中未出现888类型的数据,则程序阻塞在这里
printf("read from que:%s\n",readbuf.mtext);
msgsnd(msgid,&sendbuf,strlen(sendbuf.mtext),0);//往队列id为msgid的队列写入sendbuf(类型为999)数据
msgctl(msgid,IPC_RMID,NULL);//将队列从系统内核中删除
return 0;
}
send.c文件
#include
#include
#include
#include
#include
struct msgbuf{
long mtype; /* message type, must be > 0 */
char mtext[128]; /* message data */
};
int main()
{
struct msgbuf sendbuf={888,"this is message from que"};
struct msgbuf readbuf;
int msgid= 0;
key_t key;
key = ftok(".",'z');//获取键值
printf("key=%x\n",key);
msgid=msgget(key,IPC_CREAT|0777);//在内核中打开或建立键值为key的,权限为0777的消息队列
if(msgid == -1){
printf("create msgq failure\n");
}
msgsnd(msgid,&sendbuf,strlen(sendbuf.mtext),0);//往队列id为msgid的队列写入sendbuf(类型为888)数据
msgrcv(msgid,&readbuf,sizeof(readbuf.mtext),999,0);//从队列中获取999类型的数据,如果队列中未出现999类型的数据,则程序阻塞在这里
printf("%s\n",readbuf.mtext);
msgctl(msgid,IPC_RMID,NULL);//将队列从系统内核中删除
return 0;
}
输出结果如下所示:
**因为本文重点讲述底层原理,应用层所用到的函数再这里不展开说明,需要的盆友请看 **(21条消息) Liunx系统编程篇—进程通信(三)消息队列(原理、创建、实战)_阿——波罗的博客-CSDN博客_linux消息队列原理 上面所示实例,也引于此。
消息队列的建立:
应用层函数msgget() 最终会跳转到系统调用函数sys_msgget() ,再内核中,完成消息队列的打开或者创建
*消息队列的建立,最终是通过函数newque (struct ipc_namespace ns, key_t key, int msgflg) 来实现的,如下所示
// 只保留了重要代码,完整代码请看msg.c 文件
static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
{
struct msg_queue *msq;// 每个消息队列都有一个msqid_ds结构与其关联:
int id, retval;
msq = ipc_rcu_alloc(sizeof(*msq));
//创建msg_queue结构
msq->q_perm.mode = msgflg & S_IRWXUGO;
msq->q_perm.key = key;//msg_queue结构中包含了key值
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq); //将msg_queue添加到消息队列基数树中,并取回基数树id
id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
//初始化msg_queue结构,如初始化消息链表、被阻塞接收进程链表等
msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = get_seconds();
msq->q_cbytes = msq->q_qnum = 0;
msq->q_qbytes = ns->msg_ctlmnb;
msq->q_lspid = msq->q_lrpid = 0; /*q_messages:消息链表
q_receivers:被阻塞的接收消息进程链表
q_senders:被阻塞的发送消息进程链表*/
INIT_LIST_HEAD(&msq->q_messages);
INIT_LIST_HEAD(&msq->q_receivers);
INIT_LIST_HEAD(&msq->q_senders);
msg_unlock(msq);
return msq->q_id;
}
消息的发送:
// 只保留了重要代码,完整代码请看msg.c 文件
long do_msgsnd(int msqid, long mtype, void __user *mtext,
size_t msgsz, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
int err;
struct ipc_namespace *ns;
ns = current->nsproxy->ipc_ns;
msg = load_msg(mtext, msgsz);//load_msg用于将用户空间的信息数据复制到内核内存中
if (IS_ERR(msg))
return PTR_ERR(msg);
msg->m_type = mtype;
msg->m_ts = msgsz;
msq = msg_lock(ns, msqid);
for (;;) {
struct msg_sender s; //定义了发送消息链表
if (msgsz + msq->q_cbytes <= msq->q_qbytes && //如果消息队列未满
1 + msq->q_qnum <= msq->q_qbytes) {
break;
}
/* queue full, wait: */
if (msgflg & IPC_NOWAIT) { //如果消息队列已满且IPC_NOWAIT未置位,阻塞发送进程;
err = -EAGAIN;
goto out_unlock_free;
}
//如果IPC_NOWAIT置位,返回EAGAIN通知用户进程再次尝试发送;
ss_add(msq, &s);
ipc_rcu_getref(msq);
msg_unlock(msq);
schedule();
ipc_lock_by_ptr(&msq->q_perm);
ipc_rcu_putref(msq);
}
msq->q_lspid = current->tgid;
msq->q_stime = get_seconds();
/*如果有被阻塞的接收进程,且消息满足接收要求,则将消息直接发送给被阻塞的接收进程,否则,将消息排入消息队列尾*/
if (!pipelined_send(msq, msg)) {
/* noone is waiting for this message, enqueue it, */
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
atomic_add(msgsz, &msg_bytes);
atomic_inc(&msg_hdrs);
}
err = 0;
msg = NULL;
out_unlock_free:
msg_unlock(msq);
out_free:
if (msg != NULL)
free_msg(msg);
return err;
}
消息的发送
讲道理,我没看懂这一块,从英文翻译来看,大概知道干了这些事
1.参数检查及权限检查
2.如果有满足接收要求的消息(消息队列中有消息,且类型、长度都满足要求)
a.将消息从消息队列中取出,并复制到用户地址空间
b.释放消息所占用的内核内存
c.尝试唤醒被阻塞的第一个消息发送进程
3.如果没有满足接收要求的消息
A.如果IPC_NOWAIT置位,返回ENOMSG通知用户进程没有消息
B.如果IPC_NOWAIT未置位,阻塞消息接收进程
C.阻塞进程被唤醒
a.如果因有满足接收要求的消息发送,同2的a和b的处理;
b.如果因为信号发送,则先做信号处理;再自动重新调用msgsnd
c.否则接收进程继续被阻塞
long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
size_t msgsz, long msgtyp, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
int mode;
struct ipc_namespace *ns;
if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
mode = convert_mode(&msgtyp, msgflg);
ns = current->nsproxy->ipc_ns;
msq = msg_lock(ns, msqid);
if (msq == NULL)
return -EINVAL;
msg = ERR_PTR(-EIDRM);
if (msg_checkid(ns, msq, msqid))
goto out_unlock;
for (;;) {
struct msg_receiver msr_d;
struct list_head *tmp;
msg = ERR_PTR(-EACCES);
if (ipcperms(&msq->q_perm, S_IRUGO))
goto out_unlock;
msg = ERR_PTR(-EAGAIN);
tmp = msq->q_messages.next;
while (tmp != &msq->q_messages) {
struct msg_msg *walk_msg;
walk_msg = list_entry(tmp, struct msg_msg, m_list);
if (testmsg(walk_msg, msgtyp, mode) &&
!security_msg_queue_msgrcv(msq, walk_msg, current,
msgtyp, mode)) {
msg = walk_msg;
if (mode == SEARCH_LESSEQUAL &&
walk_msg->m_type != 1) {
msg = walk_msg;
msgtyp = walk_msg->m_type - 1;
} else {
msg = walk_msg;
break;
}
}
tmp = tmp->next;
}
if (!IS_ERR(msg)) {
/*
* Found a suitable message.
* Unlink it from the queue.
*/
if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
msg = ERR_PTR(-E2BIG);
goto out_unlock;
}
list_del(&msg->m_list);
msq->q_qnum--;
msq->q_rtime = get_seconds();
msq->q_lrpid = current->tgid;
msq->q_cbytes -= msg->m_ts;
atomic_sub(msg->m_ts, &msg_bytes);
atomic_dec(&msg_hdrs);
ss_wakeup(&msq->q_senders, 0);
msg_unlock(msq);
break;
}
/* No message waiting. Wait for a message */
if (msgflg & IPC_NOWAIT) {
msg = ERR_PTR(-ENOMSG);
goto out_unlock;
}
list_add_tail(&msr_d.r_list, &msq->q_receivers);
msr_d.r_tsk = current;
msr_d.r_msgtype = msgtyp;
msr_d.r_mode = mode;
if (msgflg & MSG_NOERROR)
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = msgsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
current->state = TASK_INTERRUPTIBLE;
msg_unlock(msq);
schedule();
/* Lockless receive, part 1:
* Disable preemption. We don't hold a reference to the queue
* and getting a reference would defeat the idea of a lockless
* operation, thus the code relies on rcu to guarantee the
* existance of msq:
* Prior to destruction, expunge_all(-EIRDM) changes r_msg.
* Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
* rcu_read_lock() prevents preemption between reading r_msg
* and the spin_lock() inside ipc_lock_by_ptr().
*/
rcu_read_lock();
/* Lockless receive, part 2:
* Wait until pipelined_send or expunge_all are outside of
* wake_up_process(). There is a race with exit(), see
* ipc/mqueue.c for the details.
*/
msg = (struct msg_msg*)msr_d.r_msg;
while (msg == NULL) {
cpu_relax();
msg = (struct msg_msg *)msr_d.r_msg;
}
/* Lockless receive, part 3:
* If there is a message or an error then accept it without
* locking.
*/
if (msg != ERR_PTR(-EAGAIN)) {
rcu_read_unlock();
break;
}
/* Lockless receive, part 3:
* Acquire the queue spinlock.
*/
ipc_lock_by_ptr(&msq->q_perm);
rcu_read_unlock();
/* Lockless receive, part 4:
* Repeat test after acquiring the spinlock.
*/
msg = (struct msg_msg*)msr_d.r_msg;
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock;
list_del(&msr_d.r_list);
if (signal_pending(current)) {
msg = ERR_PTR(-ERESTARTNOHAND);
out_unlock:
msg_unlock(msq);
break;
}
}
if (IS_ERR(msg))
return PTR_ERR(msg);
msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
*pmtype = msg->m_type;
if (store_msg(mtext, msg, msgsz))
msgsz = -EFAULT;
free_msg(msg);
return msgsz;
}
最后放一张内核层的消息队列的流程图,仅供参考,因为看到最后我也有点懵了,哈哈
同样是进程间的通信,那么消息队列相较而言有哪些优势和劣势呢
优点:1)消息队列收发消息自动保证了同步,不需要由进程自己来提供同步方法,而命名管道需要自行处理同步问题;
2)消息队列接收数据可以根据消息类型有选择的接收特定类型的数据,不需要像命名管道一样默认接收数据。
想了解管道的盆友,可以看看我的另一篇博客:管道
缺点: 发送和接受的每个数据都有最大的长度限制
笔者对于消息队列的内核源码的阅读看到最后,有点懵逼了,暂时就先记录
到这,以后有了新的领悟再修正修正。
(20条消息) Linux系统编程——进程间的通信(三)消息队列原理以及用法_30-CSDN博客_linux消息队列原理
Linux进程间通信——消息队列 - 知乎 (zhihu.com)
(20条消息) Liunx系统编程篇—进程通信(三)消息队列(原理、创建、实战)_阿——波罗的博客-CSDN博客_linux消息队列原理
(20条消息) Linux消息队列之原理实现篇(转)_weixin_34009794的博客-CSDN博客
(20条消息) Linux消息队列之原理实现篇(转)_weixin_34009794的博客-CSDN博客