epoll_create函数实现源码分析

昨晚分析了poll,通阅读可以发现poll操作有很多可以化的地方。epolleventpoll称,他的效率是非常高的,我今天来看看他的实现。他的实现FS/Eventpoll.c,代1500多行,呵呵,怕了吧。

大家都知道,epoll有三个系统调用,C封装成以下三个:

1.  int epoll_create(int size);

2.  int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);

3.  int epoll_wait(int epfd, struct epoll_event *events,int maxevents, int timeout);

epoll的源码这么多,我就干脆跟着他三个走着瞧。今天先搞定第一个---epoll_create

第一个是

/*
* It opens an eventpoll file descriptor by suggesting a storage of "size"
* file descriptors. The size parameter is just an hint about how to size
* data structures. It won't prevent the user to store more than "size"
* file descriptors inside the epoll interface. It is the kernel part of
* the userspace epoll_create(2).
*/
asmlinkage long sys_epoll_create(int size)
{
int error, fd;
struct inode *inode;
struct file *file;

DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
current, size));

/* Sanity check on the size parameter */
error = -EINVAL;
if (size <= 0)
goto eexit_1;

/*
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure, and inode and a free file descriptor.
*/
error = ep_getfd(&fd, &inode, &file); //(1)
if (error)
goto eexit_1;

/* Setup the file internal data structure ( "struct eventpoll" ) */
error = ep_file_init(file); //(2)
if (error)
goto eexit_2;


DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, fd));

return fd;

eexit_2:
sys_close(fd);
eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, error));
return error;
}



(1)里用到了一个ep_getfd函数,从注知道,个函数建立eventpoll相关的file,当然,一个file要包括文件描述符、inode有文件象,也是我们传的三个参数。废话,看源


/*
* Creates the file descriptor to be used by the epoll interface.
*/
static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
{
struct qstr this;
char name[32];
struct dentry *dentry;
struct inode *inode;
struct file *file;
int error, fd;

/* Get an ready to use file */
error = -ENFILE;
file = get_empty_filp();
if (!file)
goto eexit_1;

/* Allocates an inode from the eventpoll file system */
inode = ep_eventpoll_inode();
error = PTR_ERR(inode);
if (IS_ERR(inode))
goto eexit_2;

/* Allocates a free descriptor to plug the file onto */
error = get_unused_fd();
if (error < 0)
goto eexit_3;
fd = error;

/*
* Link the inode to a directory entry by creating a unique name
* using the inode number.
*/
error = -ENOMEM;
sprintf(name, "[%lu]", inode->i_ino);
this.name = name;
this.len = strlen(name);
this.hash = inode->i_ino;
dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this);
if (!dentry)
goto eexit_4;
dentry->d_op = &eventpollfs_dentry_operations;
d_add(dentry, inode);
file->f_vfsmnt = mntget(eventpoll_mnt);
file->f_dentry = dentry;
file->f_mapping = inode->i_mapping;

file->f_pos = 0;
file->f_flags = O_RDONLY;
file->f_op = &eventpoll_fops;
file->f_mode = FMODE_READ;
file->f_version = 0;
file->private_data = NULL;

/* Install the new setup file into the allocated fd. */
fd_install(fd, file);

*efd = fd;
*einode = inode;
*efile = file;
return 0;

eexit_4:
put_unused_fd(fd);
eexit_3:
iput(inode);
eexit_2:
put_filp(file);
eexit_1:
return error;
}


个函数的注都比全,简单提一下,况且因涉及到的函数太多,要深究起来涉及的知太多,也不可能逐一去列代。不过这个函数个人得比较经典,函数就是建一个文件的流程。

首先,我得拿到一个file构体,通内核分配;然后我要拿到inodeep_eventpoll_inode()就可以了;接着是get_unused_fd()拿到文件描述符;接着d_alloc()函数拿到一个dentryd_add(dentry, inode)函数把dentry建立hash里面并且inode;后面是继续填充文件filefd_install(fd, file)程注册文件,并通过这样的方式把文件描述符和文件象关起来。

(2)在跟踪ep_file_init函数之前,我先来看一下eventpoll构体:

/*
* This structure is stored inside the "private_data" member of the file
* structure and rapresent the main data sructure for the eventpoll
* interface.
*/
struct eventpoll {
/* Protect the this structure access */
rwlock_t lock;

/*
* This semaphore is used to ensure that files are not removed
* while epoll is using them. This is read-held during the event
* collection loop and it is write-held during the file cleanup
* path, the epoll file exit code and the ctl operations.
*/
struct rw_semaphore sem;

/* Wait queue used by sys_epoll_wait() */
wait_queue_head_t wq;

/* Wait queue used by file->poll() */
wait_queue_head_t poll_wait;

/* List of ready file descriptors */
struct list_head rdllist;

/* RB-Tree root used to store monitored fd structs */
struct rb_root rbr;
};


也是相当清楚。eventpoll可以看得出来,是epoll的核心,它将会存你想要听的文件描述符,也是什么epoll高效之所在。

好,我回到sys_epoll_create函数,开始跟踪ep_file_init函数:


static int ep_file_init(struct file *file)
{
struct eventpoll *ep;

if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
return -ENOMEM;

memset(ep, 0, sizeof(*ep));
rwlock_init(&ep->lock);
init_rwsem(&ep->sem);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT;

file->private_data = ep;

DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
current, ep));
return 0;
}



也就是eventpoll构体的初始化。

sys_epoll_create函数大概就这样了,明天接着看sys_epoll_ctl

 

你可能感兴趣的:(epoll_create函数实现源码分析)