当系统启动时,epoll
会进行初始化操作:
//用于实现安全的poll唤醒,避免在wake_up()内部再次进入poll回调
struct poll_safewake {
struct list_head wake_task_list;
spinlock_t lock;
};
//用来序列化ep_free()和eventpoll_release_file()的互斥量
static struct mutex epmutex;
/* Safe wake up implementation */
static struct poll_safewake psw;
static int __init eventpoll_init(void)
{
mutex_init(&epmutex); //初始化互斥量
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_poll_safewake_init(&psw);
//slab动态分配内存,用于分配struct epitm
epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
NULL);
//slab动态分配内存,用于分配struct eppoll_entry
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0,
EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
return 0;
}
fs_initcall(eventpoll_init);
epoll
需要三个级别的锁。
1.epmutex(mutex);
2.ep->mtx(mutex);
3.ep->lock(spinlock);
对于自旋锁ep->lock(spinlock)
,因为我们在poll
回调内部操作对象,该回调可能是由wake_up
触发的,而wake_up
有可能从中断请求上下文调用。所以我们不能在poll
回调中休眠,因此我们需要一个spinlock
。
在事件传输循环(从内核到用户空间)期间,由于copy_to_user()
,我们可能需要一个允许我们进入睡眠状态的锁。这个锁是一个互斥锁。epoll_ctl(EPOLL_CTL_DEL)
和eventpoll_release_file()
期间获取的。然后我们还需要一个全局互斥锁来序列化eventpoll_release_file()
和ep_free()
。
这个互斥锁是在epoll
文件清理路径期间由ep_free()
获取的,如果文件已被push
到epoll set
中,则它也由eventpoll_release_file()
获取,然后在没有之前调用epoll_ctl(EPOLL_CTL_DEL)
的情况下关闭它。
struct epoll_filefd {
struct file *file;
int fd;
};
struct list_head {
struct list_head *next, *prev;
};
struct epoll_event {
__u32 events;
__u64 data;
} EPOLL_PACKED;
struct rb_root
{
struct rb_node *rb_node;
};
struct rb_node
{
unsigned long rb_parent_color; //父节点颜色
#define RB_RED 0
#define RB_BLACK 1
struct rb_node *rb_right; //左子树
struct rb_node *rb_left; //右子树
} __attribute__((aligned(sizeof(long))));
//添加到eventpoll的每一个文件描述符都会有一个链接到红黑树的epitem条目
struct epitem {
//红黑树节点用于将epitm链接到eventpoll红黑树
struct rb_node rbn;
//用于将epitem链接到eventpoll就绪列表的列表头
struct list_head rdllink;
/*
* Works together "struct eventpoll"->ovflist in keeping the
* single linked chain of items.
*/
struct epitem *next;
//文件描述符的信息
struct epoll_filefd ffd;
//附加在poll操作的活动等待队列数
int nwait;
//包含poll wait queue列表
struct list_head pwqlist;
//包含epitem的容器
struct eventpoll *ep;
//将epitem链接到“struct file”项列表的列表头
struct list_head fllink;
//描述感兴趣事件的结构和源文件描述符的结构
struct epoll_event event;
};
//该结构存储在文件结构的“private_data”成员中,并代表eventpoll接口的主要数据结构
struct eventpoll {
//保护这个结构
spinlock_t lock;
//该互斥锁用于确保epoll使用文件时不会删除这些文件
//在事件收集循环,文件清理路径,epoll文件退出代码和ctl操作时持有
struct mutex mtx;
//sys_epoll_wait()使用的等待队列
wait_queue_head_t wq;
//file->poll()使用的等待队列
wait_queue_head_t poll_wait;
//就绪文件描述符列表
struct list_head rdllist;
//红黑树树根用于存储受监控的fd结构
struct rb_root rbr;
//这是一个单链表,它链接了在将就绪事件传输到用户空间时发生的所有“epitem”
//不需要持有锁
struct epitem *ovflist;
};
/* poll hooks使用的等待结构 */
struct eppoll_entry {
/* 将eppoll_entry链接到“struct epitem”的队列头 */
struct list_head llink;
/* base指针指向"struct epitem"的容器 */
void *base;
/*
* wait queue项链接到目标文件等待队列头
*/
wait_queue_t wait;
/* 链接等待队列项的等待队列头 */
wait_queue_head_t *whead;
};
/*
* 打开一个eventepoll文件描述符。size参数现在已经被抛弃
*/
asmlinkage long sys_epoll_create(int size)
{
int error, fd = -1;
struct eventpoll *ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
current, size));
/*
* 对size参数进行完整性检查
*/
error = -EINVAL; //EINVAL表示无效的参数
if (size <= 0 || (error = ep_alloc(&ep)) < 0) { //ep_alloc初始化eventpoll结构
fd = error;
goto error_return;
}
/*
* 创建设置eventpoll文件所需的所有项目。即,文件结构和空闲文件描述符
* 将创建的eventpoll文件和文件描述符关联起来
* static const struct file_operations eventpoll_fops = {
* .release = ep_eventpoll_release,
* .poll = ep_eventpoll_poll
* };
*/
fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
if (fd < 0)
ep_free(ep);
error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, fd));
return fd;
}
static int ep_alloc(struct eventpoll **pep)
{
struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
if (!ep)
return -ENOMEM;
spin_lock_init(&ep->lock);
mutex_init(&ep->mtx);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT;
ep->ovflist = EP_UNACTIVE_PTR;
*pep = ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
current, ep));
return 0;
}
/* File callbacks that implement the eventpoll file behaviour */
static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_release,
.poll = ep_eventpoll_poll
};
static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
struct eventpoll *ep = file->private_data; //file->private_data指向eventpoll结构
if (ep)
ep_free(ep); //释放eventpoll
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
return 0;
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
unsigned int pollflags = 0;
unsigned long flags;
struct eventpoll *ep = file->private_data;
//将wait插入poll等待队列
/*
* static inline void poll_wait(struct file * filp, wait_queue_head_t*\
* wait_address, poll_table *p)
* {
* if (p && wait_address)
* p->qproc(filp, wait_address, p);
* }
* typedef struct poll_table_struct {
* poll_queue_proc qproc;
* } poll_table;
* typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
* static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
* {
* pt->qproc = qproc;
* }
*/
poll_wait(file, &ep->poll_wait, wait);
/* Check our condition */
spin_lock_irqsave(&ep->lock, flags);
if (!list_empty(&ep->rdllist))
pollflags = POLLIN | POLLRDNORM; //普通或优先级可读
spin_unlock_irqrestore(&ep->lock, flags);
return pollflags;
}
//fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
//上面语句在 sys_epoll_create(int size)函数中调用
/**
* anon_inode_getfd - 通过将其连接到匿名inode和描述文件“类”的dentry来创建一个新文件实例
*
* @name: [in] name of the "class" of the new file
* @fops [in] file operations for the new file
* @priv [in] private data for the new file (will be file's private_data)
*
* 使用anon_inode_getfd()创建的所有文件将共享一个inode,从而节省内存并避免/inode/dentry
* 设置的代码重复
*/
int anon_inode_getfd(const char *name, const struct file_operations *fops,
void *priv)
{
/*
* struct qstr {
* unsigned int hash;
* unsigned int len;
* const unsigned char *name;
* };
*/
struct qstr this;
struct dentry *dentry;
struct file *file;
int error, fd;
if (IS_ERR(anon_inode_inode))
return -ENODEV;
error = get_unused_fd(); //得到一个未使用的文件描述符
if (error < 0)
return error;
fd = error;
/*
* Link the inode to a directory entry by creating a unique name
* using the inode sequence number.
*/
error = -ENOMEM; //这个错误码的意思是内存不足
this.name = name;
this.len = strlen(name);
this.hash = 0;
dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
if (!dentry)
goto err_put_unused_fd;
/*
* We know the anon_inode inode count is always greater than zero,
* so we can avoid doing an igrab() and we can use an open-coded
* atomic_inc().
*/
atomic_inc(&anon_inode_inode->i_count);
dentry->d_op = &anon_inodefs_dentry_operations;
/* Do not publish this dentry inside the global dentry hash table */
dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, anon_inode_inode);
error = -ENFILE;
file = alloc_file(anon_inode_mnt, dentry,
FMODE_READ | FMODE_WRITE, fops);
if (!file)
goto err_dput;
file->f_mapping = anon_inode_inode->i_mapping;
file->f_pos = 0;
file->f_flags = O_RDWR;
file->f_version = 0;
file->private_data = priv;
//在fd数组中安装file指针
fd_install(fd, file);
return fd;
err_dput:
dput(dentry);
err_put_unused_fd:
put_unused_fd(fd);
return error;
}
/*
* 该函数实现了eventpoll文件的控制器,该控制器实现了在interest set
* 上的insertion/remove/change操作
*/
asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
struct epoll_event __user *event)
{
int error;
struct file *file, *tfile;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n",
current, epfd, op, fd, event));
/*
* 如果op等于EPOLL_CTL_DEL,则跳过if语句
* 如果op不等于EPOLL_CTL_DEL且将event复制到epds中出现错误则跳到error_return
*/
error = -EFAULT;
if (ep_op_has_event(op) &&
copy_from_user(&epds, event, sizeof(struct epoll_event)))
goto error_return;
/* 获取eventpoll文件的struct file */
error = -EBADF;
file = fget(epfd);
if (!file)
goto error_return;
/* 得到目标文件的struct file* */
tfile = fget(fd);
if (!tfile)
goto error_fput;
/* 目标文件描述符必须支持poll操作 */
error = -EPERM;
if (!tfile->f_op || !tfile->f_op->poll)
goto error_tgt_fput;
/*
* 判断epfd文件描述符对应的文件是否是eventpoll file
* 以及epfd对应的文件不应该等于fd对应的文件
*/
error = -EINVAL;
if (file == tfile || !is_file_epoll(file))
goto error_tgt_fput;
//这个file->private_data存放的是eventpoll结构,
//这个eventpoll结构是sys_epoll_create调用
//anon_inode_getfd时绑定的
ep = file->private_data;
mutex_lock(&ep->mtx);
//在eventpoll树中找到fd对应的节点
//返回epitem
epi = ep_find(ep, tfile, fd);
error = -EINVAL;
switch (op) { //判断op的操作
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= POLLERR | POLLHUP; //发生错误和对端关闭连接
error = ep_insert(ep, &epds, tfile, fd); //把新节点插入ep的红黑树
} else
error = -EEXIST; //已经存在,返回错误
break;
case EPOLL_CTL_DEL:
if (epi)
error = ep_remove(ep, epi); //在ep红黑树中删除epi
else
error = -ENOENT;
break;
case EPOLL_CTL_MOD: //修改操作
if (epi) {
epds.events |= POLLERR | POLLHUP;
error = ep_modify(ep, epi, &epds);
} else
error = -ENOENT;
break;
}
mutex_unlock(&ep->mtx);
error_tgt_fput:
fput(tfile);
error_fput:
fput(file);
error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n",
current, epfd, op, fd, event, error));
return error;
}
static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
{
int kcmp;
struct rb_node *rbp;
struct epitem *epi, *epir = NULL;
struct epoll_filefd ffd;
ep_set_ffd(&ffd, file, fd); //初始化ffd
for (rbp = ep->rbr.rb_node; rbp; ) {
epi = rb_entry(rbp, struct epitem, rbn);
kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
if (kcmp > 0)
rbp = rbp->rb_right;
else if (kcmp < 0)
rbp = rbp->rb_left;
else {
epir = epi;
break;
}
}
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
current, file, epir));
return epir;
}
/*
* 这是传递给等待队列唤醒机制的回调
* 当存储的文件描述符有事件要报告时,它会被调用
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
//从等待队列得到epitem
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
current, epi->ffd.file, epi, ep));
spin_lock_irqsave(&ep->lock, flags);
/*
* 如果事件掩码不包含任何poll事件,应该禁用描述符
* 这种情况很可能是EPOLLONESHOT位在接收到事件时
* 禁用描述符的影响,直到发出EPOLL_CTL_MOD
*
* #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
* #define EPOLLONESHOT (1 << 30)
* #define EPOLLET (1 << 31)
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock;
/*
* 如果我们将事件传输到用户空间中,我们不能持有锁(因为我们正在访问用户
* 内存,并且由于linux f_op->poll()语义)。在这段时间内发生的所有事件
* 都链接在ep->ovflist中,并在稍后重新排队
* #define EP_UNACTIVE_PTR ((void *) -1L)
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
}
/* If this file is already in the ready list we exit soon */
if (ep_is_linked(&epi->rdllink))
goto is_linked;
//将epi->rdllink放到ep->rdllist头部,表示已经就绪
list_add_tail(&epi->rdllink, &ep->rdllist);
is_linked:
/*
* 唤醒(如果激活)eventpoll等待列表和
* ->poll()等待列表
* static inline int waitqueue_active(wait_queue_head_t *q)
* {
* return !list_empty(&q->task_list);
* }
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
out_unlock:
spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
/*
* struct poll_safewake {
* struct list_head wake_task_list;
* spinlock_t lock;
* };
* static struct poll_safewake psw;
*/
if (pwake)
ep_poll_safewake(&psw, &ep->poll_wait); //就是执行了一个安全的唤醒操作
return 1;
}
/* revents = tfile->f_op->poll(tfile, &epq.pt);
* 将我们的等待队列加入到目标文件
* 的唤醒列表中
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
//得到当前pt对应的epitem
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq;
if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
//设置等待队列的回调函数
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
add_wait_queue(whead, &pwq->wait);
//把pwq->llink放到epi->pwqlist的头部
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
if (p && wait_address)
p->qproc(filp, wait_address, p);
}
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
//这个函数必须在持有mtx互斥量时才可以调用
//sys_epoll_ctl中调用
//error = ep_insert(ep, &epds, tfile, fd);
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
struct file *tfile, int fd)
{
int error, revents, pwake = 0;
unsigned long flags;
struct epitem *epi;
struct ep_pqueue epq;
/*
* struct ep_pqueue {
* poll_table pt;
* struct epitem *epi;
* };
*/
error = -ENOMEM;
if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) //分配一个epitem
goto error_return;
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
INIT_LIST_HEAD(&epi->fllink);
INIT_LIST_HEAD(&epi->pwqlist);
epi->ep = ep;
ep_set_ffd(&epi->ffd, tfile, fd); //初始化epoll_fieldfd
epi->event = *event;
epi->nwait = 0;
epi->next = EP_UNACTIVE_PTR;
/*
* typedef struct poll_table_struct {
* poll_queue_proc qproc;
* } poll_table;
* typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
* static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
* {
* pt->qproc = qproc;
* }
*/
//使用queue的回调函数初始化poll table
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
/*
* 执行tfile->f_op->poll(tfile, &epq.pt)时,会调用poll_wait()函数
* static inline void poll_wait(struct file * filp, wait_queue_head_t * \
* wait_address, poll_table* p)
* {
* if (p && wait_address)
* p->qproc(filp, wait_address, p);
* }
* 在这里,其实调用的是上面的ep_ptable_queue_proc()函数
* 然后返回可能立即执行不会阻塞的操作的位掩码
*/
revents = tfile->f_op->poll(tfile, &epq.pt);
/*
* 检查在poll wait queue设置过程中是否出现错误
*/
if (epi->nwait < 0)
goto error_unregister;
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_ep_lock);
//把epi->fllink加入到tfile->f_ep_links的头部
list_add_tail(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_ep_lock);
/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
*/
ep_rbtree_insert(ep, epi);
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
/* 如果文件已经就绪,我们将其放到就绪列表中 */
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&psw, &ep->poll_wait); //就是执行了一个安全的唤醒操作
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
current, ep, tfile, fd));
return 0;
error_unregister:
ep_unregister_pollwait(ep, epi);
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
* list, since that is used/cleaned only inside a section bound by "mtx".
* And ep_insert() is called with "mtx" held.
*/
spin_lock_irqsave(&ep->lock, flags);
if (ep_is_linked(&epi->rdllink))
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
kmem_cache_free(epi_cache, epi);
error_return:
return error;
}
/*
* 为eventpoll文件实现时间等待接口
* 它是用户空间epoll_wait的内核部分
*/
asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout)
{
int error;
struct file *file;
struct eventpoll *ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
current, epfd, events, maxevents, timeout));
/* The maximum number of event must be greater than zero */
if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
return -EINVAL;
/* 判断用户传递的区域是否是可写的 */
if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {
error = -EFAULT;
goto error_return;
}
/* Get the "struct file *" for the eventpoll file */
error = -EBADF;
file = fget(epfd); //得到eventpoll file的struct file结构
if (!file)
goto error_return;
/*
* 检查上面得到的file结构是否是epoll file结构
*/
error = -EINVAL;
if (!is_file_epoll(file))
goto error_fput;
/*
* 得到struct file对应的eventpoll结构
*/
ep = file->private_data;
/* Time to fish for events ... */
error = ep_poll(ep, events, maxevents, timeout);
error_fput:
fput(file);
error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
current, epfd, events, maxevents, timeout, error));
return error;
}
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout)
{
int res, eavail;
unsigned long flags;
long jtimeout;
wait_queue_t wait;
/*
* Calculate the timeout by checking for the "infinite" value ( -1 )
* and the overflow condition. The passed timeout is in milliseconds,
* that why (t * HZ) / 1000.
*/
jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
retry:
spin_lock_irqsave(&ep->lock, flags);
res = 0;
if (list_empty(&ep->rdllist)) {
/*
* 没有任何可用的事件返回给调用者,需要在这里睡眠
* 当事件可用时,我们将会被ep_poll_callback()唤醒
*/
//current是task_struct结构,把current加入到等待队列中
init_waitqueue_entry(&wait, current);
wait.flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue(&ep->wq, &wait); //wait等待队列加入到eventpoll的等待队列中
for (;;) {
/*
* 设置当前进程状态为可中断
*/
set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&ep->rdllist) || !jtimeout)
break;
//测试是否有信号
if (signal_pending(current)) {
res = -EINTR;
break;
}
spin_unlock_irqrestore(&ep->lock, flags);
//主动让出处理器,等待ep_poll_callback()将当前进程唤醒或者超时
//返回值是剩余的时间
jtimeout = schedule_timeout(jtimeout);
spin_lock_irqsave(&ep->lock, flags);
}
__remove_wait_queue(&ep->wq, &wait);
set_current_state(TASK_RUNNING);
}
/* Is it worth to try to dig for events ? */
eavail = !list_empty(&ep->rdllist);
spin_unlock_irqrestore(&ep->lock, flags);
/*
* Try to transfer events to user space. In case we get 0 events and
* there's still timeout left over, we go trying again in search of
* more luck.
*/
if (!res && eavail &&
!(res = ep_send_events(ep, events, maxevents)) && jtimeout)
goto retry;
return res;
}