- 在使用epoll的过程中,如果使用epoll_ctl添加普通文件描述符,会返回EPERM。manual中描述如下:
The target file fd does not support epoll. This error can occur if fd refers to, for example, a regular file or a directory.
- 基于Linux 2.6.0,探究一下内核中如何限制的。
- eventpoll.c:epoll_ctl时代码如下。可以看到,获取到的struct file需要支持poll才能添加到epoll监听列表中。
asmlinkage long
sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
{
...
/* The target file descriptor must support poll */
error = -EPERM;
if (!tfile->f_op || !tfile->f_op->poll)
goto eexit_3;
...
}
struct file {
struct list_head f_list;
struct dentry *f_dentry;
struct vfsmount *f_vfsmnt;
struct file_operations *f_op;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
int f_error;
struct file_ra_state f_ra;
unsigned long f_version;
void *f_security;
/* needed for tty driver, and maybe others */
void *private_data;
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
spinlock_t f_ep_lock;
};
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void __user *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
};
- struct file通过open系统调用得到,open系统调用中给f_op赋值过程如下
// open.c
asmlinkage long sys_open(const char __user * filename, int flags, int mode)
{
char * tmp;
int fd, error;
#if BITS_PER_LONG != 32
flags |= O_LARGEFILE;
#endif
tmp = getname(filename);
fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd();
if (fd >= 0) {
struct file *f = filp_open(tmp, flags, mode);
error = PTR_ERR(f);
if (IS_ERR(f))
goto out_error;
fd_install(fd, f);
}
out:
putname(tmp);
}
return fd;
out_error:
put_unused_fd(fd);
fd = error;
goto out;
}
struct file *filp_open(const char * filename, int flags, int mode)
{
int namei_flags, error;
struct nameidata nd;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
error = open_namei(filename, namei_flags, mode, &nd);
if (!error)
return dentry_open(nd.dentry, nd.mnt, flags);
return ERR_PTR(error);
}
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
struct file * f;
struct inode *inode;
int error;
error = -ENFILE;
f = get_empty_filp();
if (!f)
goto cleanup_dentry;
f->f_flags = flags;
f->f_mode = (flags+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
file_ra_state_init(&f->f_ra, inode->i_mapping);
f->f_dentry = dentry;
f->f_vfsmnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop); //这里就是判断module是否装载了,没装载返回NULL,装载了返回的就是inode->i_fop了
file_move(f, &inode->i_sb->s_files);
...
}
- 而strcut inode中的struct file_operations i_fop都是通过赋值静态数据结构的,比如常见的如下所示。(通过全局搜索static struct file_operations,搜索 ".poll.?="更全面)可以认为给poll结构体成员赋值的都支持添加到epoll中。
// socket.c
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
.ioctl = sock_ioctl,
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.readv = sock_readv,
.writev = sock_writev,
.sendpage = sock_sendpage
};
// eventpoll.c
/* File callbacks that implement the eventpoll file behaviour */
static struct file_operations eventpoll_fops = {
.release = ep_eventpoll_close,
.poll = ep_eventpoll_poll
};
// inode.c
static struct file_operations default_file_operations = {
.read = default_read_file,
.write = default_write_file,
.open = default_open,
.llseek = default_file_lseek,
};
- 目前认知中的支持的有: pipe、fifo、socket、timer_fd
// pipe.c
struct file_operations rdwr_fifo_fops = {
.llseek = no_llseek,
.read = pipe_read,
.write = pipe_write,
.poll = fifo_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
.release = pipe_rdwr_release,
.fasync = pipe_rdwr_fasync,
};
struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
.read = pipe_read,
.write = pipe_write,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
.release = pipe_rdwr_release,
.fasync = pipe_rdwr_fasync,
};
// timerfd.c,linux 2.6.25开始才有
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
.poll = timerfd_poll,
.read = timerfd_read,
};
// tun.c
static struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = tun_chr_read,
.readv = tun_chr_readv,
.write = tun_chr_write,
.writev = tun_chr_writev,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
.release = tun_chr_close,
.fasync = tun_chr_fasync
};