open系统调用在内核中的流程分析

真是蛮复杂的,我分三步走,力求讲得比较清楚。
以字符设备为例,相对于块设备要简单些。
基于2.6.26的内核

一)驱动注册open函数都干了些什么?


register_chrdev -> cdev_add  ->  kobj_map

file: fs/char_dev.c
int register_chrdev(unsigned int major, const char *name,
                    const struct file_operations *fops)
{
        struct char_device_struct *cd;
        struct cdev *cdev;
        char *s;
        int err = -ENOMEM;

        cd = __register_chrdev_region(major, 0, 256, name);
        if (IS_ERR(cd))
                return PTR_ERR(cd);

        cdev = cdev_alloc();
        if (!cdev)
                goto out2;

        cdev->owner = fops->owner;
        cdev->ops = fops;        // 注意,在后面的 chrdev_open会从cdev再得到 fops

...
}

file: fs/char_dev.c
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
        p->dev = dev;
        p->count = count;
        return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);
}

file: fs/char_dev.c
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
        struct cdev *p = data;
        return &p->kobj;
}

file: drivers/base/map.c
int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
             struct module *module, kobj_probe_t *probe,
             int (*lock)(dev_t, void *), void *data)
{
        unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
        unsigned index = MAJOR(dev);
        unsigned i;
        struct probe *p;

        if (n > 255)
                n = 255;

        p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL);

        if (p == NULL)
                return -ENOMEM;

        for (i = 0; i < n; i++, p++) {
                p->owner = module;
                p->get = probe;            // 此处其实就是exact_match
                p->lock = lock;
                p->dev = dev;
                p->range = range;
                p->data = data;
        }
        mutex_lock(domain->lock);
        for (i = 0, p -= n; i < n; i++, p++, index++) {
                struct probe **s = &domain->probes[index % 255];
                while (*s && (*s)->range < range)
                        s = &(*s)->next;
                p->next = *s;
                *s = p;
        }
        mutex_unlock(domain->lock);
        return 0;
}

二)从系统调用往内核走,看当初驱动里注册的file_operations里的open函数怎么被调用的


sys_open -> do_sys_open -> do_filp_open -> nameidata_to_filp -> __dentry_open

问题是 1)__dentry_open如何找到 chrdev_open?
             2)最终又是如何调用file_operations里的在驱动里面注册的open函数的呢?
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
                                        int flags, struct file *f,
                                        int (*open)(struct inode *, struct file *))
{
        struct inode *inode;
        int error;

        f->f_flags = flags;
        f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
                                FMODE_PREAD | FMODE_PWRITE;
        inode = dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
                error = __get_file_write_access(inode, mnt);
                if (error)
                        goto cleanup_file;
                if (!special_file(inode->i_mode))
                        file_take_write(f);
        }

        f->f_mapping = inode->i_mapping;
        f->f_path.dentry = dentry;
        f->f_path.mnt = mnt;
        f->f_pos = 0;
        f->f_op = fops_get(inode->i_fop);    // 此处获得 def_chr_fops
        file_move(f, &inode->i_sb->s_files);

        error = security_dentry_open(f);
        if (error)
                goto cleanup_all;

        if (!open && f->f_op)
                open = f->f_op->open;        // 此处调用 def_chr_fops里的open函数,即chrdev_open

...
}

file: fs/char_dev.c
chrdev_open() {
        struct cdev *p;
        struct cdev *new = NULL;
        int ret = 0;

        spin_lock(&cdev_lock);
        p = inode->i_cdev;
        if (!p) {
                struct kobject *kobj;
                int idx;
                spin_unlock(&cdev_lock);
                kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);  // 找到cdev对应的kobj对象, 跟kobj_map遥相对应的,反操作
                if (!kobj)
                        return -ENXIO;
                new = container_of(kobj, struct cdev, kobj);    // 找到cdev
                spin_lock(&cdev_lock);
                p = inode->i_cdev;
                if (!p) {
                        inode->i_cdev = p = new;
                        inode->i_cindex = idx;
                        list_add(&inode->i_devices, &p->list);
                        new = NULL;
                } else if (!cdev_get(p))
                        ret = -ENXIO;
        } else if (!cdev_get(p))
                ret = -ENXIO;
        spin_unlock(&cdev_lock);
        cdev_put(new);
        if (ret)
                return ret;
        filp->f_op = fops_get(p->ops);            // 这里又找回了当初驱动注册时的 file_operations指针
        if (!filp->f_op) {
                cdev_put(p);
                return -ENXIO;
        }
        if (filp->f_op->open) {
                lock_kernel();
                ret = filp->f_op->open(inode,filp);    // 此处算真正的调用了file_operations里的open函数
                unlock_kernel();
        }

    ...
}

file: drivers/base/map.c
struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
{
        struct kobject *kobj;
        struct probe *p;
        unsigned long best = ~0UL;

retry:
        mutex_lock(domain->lock);
        for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
                struct kobject *(*probe)(dev_t, int *, void *);
                struct module *owner;
                void *data;

                if (p->dev > dev || p->dev + p->range - 1 < dev)
                        continue;
                if (p->range - 1 >= best)
                      break;
                if (!try_module_get(p->owner))
                        continue;
                owner = p->owner;
                data = p->data;
                probe = p->get;                // 这里其实就是 exact_match函数了
                best = p->range - 1;
                *index = dev - p->dev;
                if (p->lock && p->lock(dev, data) < 0) {
                        module_put(owner);
                        continue;
                }
                mutex_unlock(domain->lock);
                kobj = probe(dev, index, data);        // 这里调用了 exact_match 函数
                /* Currently ->owner protects _only_ ->probe() itself. */
                module_put(owner);
                if (kobj)
                        return kobj;
                goto retry;
        }
        mutex_unlock(domain->lock);
        return NULL;
}

三)什么时候为字符设备设置的def_chr_fops ?

这个跟具体的文件系统有关系的。

现在/dev/下的设备节点都是通过udev动态创建的,udev会去调用mknod(假定是ext2,内核会调用ext2_mknod),
如果是char设备,会把def_chr_fops附给inode->i_fop,而ext2_mknod会调用init_special_inode(),函数
的部分实现如下:

file: fs/ext2/namei.c
static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
{
        struct inode * inode;
        int err;

        if (!new_valid_dev(rdev))
                return -EINVAL;

        inode = ext2_new_inode (dir, mode);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);        // 调用 init_special_inode


file: fs/inode.c
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
        inode->i_mode = mode;
        if (S_ISCHR(mode)) {
                inode->i_fop = &def_chr_fops;            // 这里为char设备设置的缺省操作
                inode->i_rdev = rdev;
    }
...

}

file: fs/char_dev.c
const struct file_operations def_chr_fops = {
        .open = chrdev_open,
};


你可能感兴趣的:(struct,Module,ext,File,null,domain)