epoll源码探秘(epoll_create)

epoll源码探秘(epoll_create)

epoll系列的系统函数,很简单,但是很强大。epoll_create(),epoll_ctl() , epoll_wait(),三个就够了。

一些重要的结构:

/*
 * Each file descriptor added to the eventpoll interface will
 * have an entry of this type linked to the "rbr" RB tree.
 * Avoid increasing the size of this struct, there can be many thousands
 * of these on a server and we do not want this to take another cache line.(红黑树单节点)
 */
struct epitem {
	union {
		/* RB tree node links this structure to the eventpoll RB tree */
		struct rb_node rbn;
		/* Used to free the struct epitem */
		struct rcu_head rcu;
	};

	/* List header used to link this structure to the eventpoll ready list (列表头用于连接结构的eventpoll就绪列表)*/
	struct list_head rdllink;

	/*
	 * Works together "struct eventpoll"->ovflist in keeping the
	 * single linked chain of items.
	 */
	struct epitem *next;

	/* The file descriptor information this item refers to (关联的文件描述符)*/
	struct epoll_filefd ffd;

	/* Number of active wait queue attached to poll operations (轮询操作)*/
	int nwait;

	/* List containing poll wait queues */
	struct list_head pwqlist;

	/* The "container" of this item */
	struct eventpoll *ep;

	/* List header used to link this item to the "struct file" items list */
	struct list_head fllink;

	/* wakeup_source used when EPOLLWAKEUP is set */
	struct wakeup_source __rcu *ws;

	/* The structure that describe the interested events and the source fd (感兴趣的监控文件描述符的状态)*/
	struct epoll_event event;
};
/*
 * This structure is stored inside the "private_data" member of the file
 * structure and represents(表现)the main data structure for the eventpoll
 * interface.
 */
struct eventpoll {
	/* Protect the access to this structure */
	spinlock_t lock;

	/*
	 * This mutex is used to ensure that files are not removed
	 * while epoll is using them. This is held during the event
	 * collection loop, the file cleanup path, the epoll file exit
	 * code and the ctl operations.
	 */
	struct mutex mtx;

	/* Wait queue used by sys_epoll_wait() (双链表,epoll文件的等待队列。
	 *调用epoll_wait的进程可能在此队列上睡眠, 等待ep_poll_callback()函数唤醒或超时 
	 */
	wait_queue_head_t wq;

	/* Wait queue used by file->poll() (双链表, poll_wait是eventpoll文件本身的唤醒队列,
	 *该队列上睡眠的进程是等待eventpoll文件本身的某些事件发生
	 */
	wait_queue_head_t poll_wait;

	/* List of ready file descriptors (就绪链表)*/
	struct list_head rdllist;

	/* RB tree root used to store monitored fd structs (存储监听文件描述符结构红黑树根节点)*/
	struct rb_root rbr;

	/*
	 * This is a single linked list(单链表) that chains all the "struct epitem" that
	 * happened while transferring ready events to userspace w/out
	 * holding ->lock.(如果正在向用户空间传递事件,此时状态就绪的文件描述符相关的结构会暂时放在该队列上,
	 * 否则会直接添加到就绪队列rdllist中。)
	 */
	struct epitem *ovflist;

	/* wakeup_source used when ep_scan_ready_list is running */
	struct wakeup_source *ws;

	/* The user that created the eventpoll descriptor */
	struct user_struct *user;

	struct file *file;

	/* used to optimize loop detection check */
	int visited;
	struct list_head visited_list_link;
};


epoll_create函数:

         创建一个epoll的句柄。需要注意的是,当创建好epoll句柄后,它就是会占用一个fd值,在linux下如果查看/proc/进程id/fd/,是能够看到这个fd的,所以在使用完epoll后,必须调用close()关闭,否则可能导致fd被耗尽。

int epoll_create(int size);
int epoll_create1(int flags);

第一级:epoll_create()(注意在Linux 2.6.8之后,size参数是被忽略的)

第二级: epoll_create1()

第三级:ep_alloc()创建内部数据(eventpoll)

                 在ep_alloc()中

               1.初始化epoll文件等待队列(双向链表)

               2.初始化eventpoll文件唤醒队列(双向链表)

               3.初始化就绪队列(双向链表)

static inline void INIT_LIST_HEAD(struct list_head *list)
{
	list->next = list;
	list->prev = list;
}

               4.初始化红黑树根节点

#define RB_ROOT	(struct rb_root) { NULL, }
ep->rbr = RB_ROOT;

               5.初始化发生事件红黑树节点链表(单链表)

#define EP_UNACTIVE_PTR ((void *) -1L)
ep->ovflist = EP_UNACTIVE_PTR;

第三级:get_unused_fd_flags()获取一个空闲的文件描述符

第三级:anon_inode_getfile()创建一个匿名文件

第三级:fd_install()将文件与fd建立联系

/*
 * Open an eventpoll file descriptor.
 */
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
	int error, fd;
	struct eventpoll *ep = NULL;
	struct file *file;

	/* Check the EPOLL_* constant for consistency(符合条件EPOLL_CLOEXEC != O_CLOEXEC就报错).  */
	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);

	if (flags & ~EPOLL_CLOEXEC)
		return -EINVAL;
	/*
	 * Create the internal(内部) data structure ("struct eventpoll").
	 */
	error = ep_alloc(&ep);
	if (error < 0)
		return error;
	/*
	 * Creates all the items needed to setup an eventpoll file. That is,
	 * a file structure and a free file descriptor.
	 *(分配eventpoll实例并初始化,存储在file结构的private_data成员中。 
     * private_data成员用来存储文件描述符真正对应的对象。例如 
     * 如果文件描述符是一个套接字的话,其对应的file实例的private_data 
     * 成员存储的就是一个socket实例。)
	 */
	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
	if (fd < 0) {
		error = fd;
		goto out_free_ep;
	}
	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
				 O_RDWR | (flags & O_CLOEXEC));
	if (IS_ERR(file)) {
		error = PTR_ERR(file);
		goto out_free_fd;
	}
	ep->file = file;
	fd_install(fd, file);
	return fd;

out_free_fd:
	put_unused_fd(fd);
out_free_ep:
	ep_free(ep);
	return error;
}

SYSCALL_DEFINE1(epoll_create, int, size)
{
	if (size <= 0)
		return -EINVAL;

	return sys_epoll_create1(0);
}




























你可能感兴趣的:(linux,源码探秘,epoll)