Linux内核中poll源码分析

函数原型:

int poll (struct pollfd *fds, nfds_t nfds, int timeout);

其中pollfd类型的结构体包含三个元素。

  • fd:感兴趣的文件描述符
  • events:感兴趣的事件,包括可读、可写和异常
  • revents:实际发生的事件,由内核填充

几个重要的结构体:

struct pollfd {
	int fd;
	short events;
	short revents;
};

/* 
     * 每次select()都会初始化一个poll_wqueues结构与这个
     * 调用相对应。结构中的polling_task字段指向调用进程
     * (也就是current进程)的task_struct
     *
     * 资源等待队列节点wait_queue_t中的private字段指向
     * 对应的poll_wqueues对象。因此资源就绪时,通过获得
     * poll_wqueues对象,然后访问其中的polling_task字段
     * 能够得到调用进程的PCB,然后对其进行唤醒...
*/

struct poll_wqueues {
	poll_table pt;		//该结构体中存放一个函数指针
	struct poll_table_page *table;
	struct task_struct *polling_task;	 //保存当前调用进程的task_struct结构体
	int triggered;	
	int error;
	int inline_index;
	struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
};

typedef struct poll_table_struct {
	poll_queue_proc _qproc;
	unsigned long _key;
} poll_table;

typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, 
 struct poll_table_struct *);

/* 真正被挂载到资源等待队列中的结构 */
struct poll_table_entry {
	struct file *filp;		//设备的文件描述符
	unsigned long key;		//等待特定事件的掩码,如POLLIN、POLLOUT、POLLERR
	wait_queue_t wait;		//节点的private成员关联poll_table_entry所属的poll_wqueues
	wait_queue_head_t *wait_address;		//指向资源等待队列队列头
};

static inline void poll_wait(struct file * filp,ait_queue_head_t * wait_address,
	poll_table *p)
{	
	if (p && p->_qproc && wait_address)
		p->_qproc(filp, wait_address, p);
}

struct poll_table_page {
	struct poll_table_page * next;
 	struct poll_table_entry * entry;
 	struct poll_table_entry entries[0];
};

一些重要函数

/*	初始化poll_wqueues	*/
void poll_initwait(struct poll_wqueues *pwq)
{
    init_poll_funcptr(&pwq->pt, __pollwait);	//将poll_wqueues中poll_table里的
    pwq->polling_task = current;
    pwq->triggered = 0;
    pwq->error = 0;
    pwq->table = NULL;
    pwq->inline_index = 0;
}

static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
{
    pt->qproc = qproc;
    pt->key   = ~0UL; /* all events enabled */
}

/* Add a new entry */
/**
 * __pollwait - 将poll_table_entry挂载到资源文件的监听队列
 * @file: 被监听的资源文件
 * @wait_address: 被监听的资源文件的等待队列头
 * @p: 在poll_initwait()中设置的poll_tbale
 */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
                poll_table *p)
{
    /* 获取poll_wqueues */
    struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
    /* 从poll_wqueues中取得一个poll_table_entry结构 */
    struct poll_table_entry *entry = poll_get_entry(pwq);
    if (!entry)
        return;
    /* 增加资源文件引用计数 */
    get_file(filp);
    /* 关联资源文件 */
    entry->filp = filp;
    /* 保存资源文件监听队列队列头 */
    entry->wait_address = wait_address;
    /* 设置想要监听事件 */
    entry->key = p->key;
    /* 
     * 初始化一个等待队列节点,其中唤醒函数设置为pollwake
     *
     * 重点!!!:
     * 唤醒函数为pollwake
     */
    init_waitqueue_func_entry(&entry->wait, pollwake);
    /*
     * 来看一下为什么等待队列节点的private要这样设计:
     * 1. 实际linux内核设计:
     *    每个wait_queue_t的private字段指向同一个poll_wqueues,然后
     *    共用的poll_wqueues中保存了指向调用进程PCB的指针,这样总共
     *    需要n + 1个指针...
     * 2. 假想设计:
     *    每个wait_queue_t的private字段指向调用进程PCB,对应的事件
     *    结构poll_table_entry中每个都保存了指向同一个poll_wqueues
     *    的指针,这样总共需要n + n个指针...
     */
    entry->wait.private = pwq;
    /* 将poll_table_entry挂载到资源文件的监听队列中 */
    add_wait_queue(wait_address, &entry->wait);
}

static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
{
    struct poll_table_page *table = p->table;

    /* 缓存数组够用就从缓存数组中分配... */
    if (p->inline_index < N_INLINE_POLL_ENTRIES)
        return p->inline_entries + p->inline_index++;

    /* 动态分配的内存为空或者已用完... */
    if (!table || POLL_TABLE_FULL(table)) {
        struct poll_table_page *new_table;

        /* 分配一页的内存给poll_table_page使用 */
        new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
        if (!new_table) {
            p->error = -ENOMEM;
            return NULL;
        }
        new_table->entry = new_table->entries;
        new_table->next = table;
        p->table = new_table;
        table = new_table;
    }

    return table->entry++;
}

void poll_freewait(struct poll_wqueues *pwq)
{
    struct poll_table_page * p = pwq->table;
    int i;
    /* 对缓存数组中的poll_table_entry进行卸载 */
    for (i = 0; i < pwq->inline_index; i++)
        free_poll_entry(pwq->inline_entries + i);
    /* 对动态内存中的poll_table_entry进行卸载 */
    while (p) {
        struct poll_table_entry * entry;
        struct poll_table_page *old;

        entry = p->entry;
        do {
            /* 一个poll_table_page对象中至少分配了一个
             * poll_table_entry,所以entry--是安全的 */
            entry--;
            free_poll_entry(entry);
        } while (entry > p->entries);
        old = p;
        p = p->next;
        free_page((unsigned long) old);
    }
}

static void free_poll_entry(struct poll_table_entry *entry)
{
    remove_wait_queue(entry->wait_address, &entry->wait);
    fput(entry->filp);
}

/**
 * pollwake - 唤醒回调函数,这个函数验证资源当前状态中是否有我们所关心的
 *            事件,如果没有,就忽略这次唤醒;如果有,就转调用__pollwake...
 * @wait: poll_table_entry.wait
 * @mode:
 * @key: 携带资源当前状态
 */
static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
    struct poll_table_entry *entry;

    /* 通过poll_wqueues中的wait获取poll_table_entry */
    entry = container_of(wait, struct poll_table_entry, wait);
    /* 如果资源的已就绪的状态中没有我们所关心的events的话,直接忽略返回 */
    if (key && !((unsigned long)key & entry->key))
        return 0;
    /* 有我们所关心的events,那就转调用__pollwake去处理吧... */
    return __pollwake(wait, mode, sync, key);
}

/* 资源就绪时真正调用的唤醒回调函数 */
static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
    /* 见__pollwait()中关于等待队列节点private字段的注释 */
    struct poll_wqueues *pwq = wait->private;
    /* 构造一个有效的等待队列节点,private字段指向调用进程的PCB */
    DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);

    /*
     * Although this function is called under waitqueue lock, LOCK
     * doesn't imply write barrier and the users expect write
     * barrier semantics on wakeup functions.  The following
     * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
     * and is paired with set_mb() in poll_schedule_timeout.
     */
    smp_wmb();
    /* 设置已触发状态标志 */
    pwq->triggered = 1;

    /*
     * Perform the default wake up operation using a dummy
     * waitqueue.
     *
     * TODO: This is hacky but there currently is no interface to
     * pass in @sync.  @sync is scheduled to be removed and once
     * that happens, wake_up_process() can be used directly.
     */
    /* 唤醒select()的调用进程 */
    return default_wake_function(&dummy_wait, mode, sync, key);
}

nfds指定被监听事件集合fds的大小

typedef unsigned long int nfds_t;

timeout参数指定poll的超时值,单位是毫秒

当超时为-1时,轮询调用将一直阻塞,直到某个事件发生;当超时为0时,轮询调用立即返回。

poll函数将超时时间格式进行转换,然后调用do_sys_poll函数

SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
  int, timeout_msecs)
{
	 struct timespec end_time, *to = NULL;
	 int ret;
	 if (timeout_msecs >= 0) {
 		 to = &end_time;
		 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, 
		 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
	 }
	 
	 ret = do_sys_poll(ufds, nfds, to); 
	 if (ret == -EINTR) { 
	 	struct restart_block *restart_block;
	 	restart_block = &current_thread_info()->restart_block;
	 	restart_block->fn = do_restart_poll;
	  	restart_block->poll.ufds = ufds;
	 	restart_block->poll.nfds = nfds;
	 	if (timeout_msecs >= 0) {
		restart_block->poll.tv_sec = end_time.tv_sec;
	   	restart_block->poll.tv_nsec = end_time.tv_nsec;
	   	restart_block->poll.has_timeout = 1;
	  	} 
  	 else
		restart_block->poll.has_timeout = 0;
	 ret = -ERESTART_RESTARTBLOCK 
	}
	return ret;
}

Linux内核中poll源码分析_第1张图片

你可能感兴趣的:(Linux内核中poll源码分析)