我们知道linux系统中文件有很多种,包括普通文件,目录文件,设备文件,管道文件,套接字文件等.文件在内核中用file结构体表示,file对象中有个重要成员 f_op指针,它指向file_operations,该结构体定义了一系列文件操作的函数指针集合,例如open,read,write,ioctl,mmap等.当对文件进行操作时,最终是调用对应的函数指针进行操作.
linux中支持很多种文件系统,每个文件系统使用之前都必须要先注册才能使用,文件系统类型用结构体file_system_type表示,注册文件系统使用register_filesystem(struct file_system_type * fs),它的作用是将文件系统类型添加到全局链表file_systems中.下面我以linux主流磁盘文件系统类型ext4的注册流程进行分析.
//下面是定义一个ext4文件系统类型
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
.mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
在用户空间执行mount系统调用进行挂载文件系统时会调用ext4_mount(),基本流程为:do_mount()->do_new_mount()->do_kern_mount()->vfs_kern_mount()->mount_fs()->ext4_fs_type.mount()->ext4_mount(),感兴趣的读者请自行分析.
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
mount_bdev函数原型声明如下:
struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int)) {
}
注意上面函数mount_bdev的最后一个参数ext4_fill_super,它是一个函数指针,指向函数ext4_fill_super.
mount_bdev中会调用ext4_fill_super()函数,代码片段如下:
struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int))
{
.........
s->s_flags = flags | MS_NOSEC;
s->s_mode = mode;
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
goto error;
}
s->s_flags |= MS_ACTIVE;
bdev->bd_super = s;
}
下面看ext4_fill_super的执行流程代码片段:
static int ext4_fill_super(struct super_block *sb, void *data, int silent) {
.....
/*
* The jbd2_journal_load will have done any necessary log recovery,
* so we can safely mount the rest of the filesystem now.
*/
root = ext4_iget(sb, EXT4_ROOT_INO);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
root = NULL;
goto failed_mount4;
}
..........
}
可以看到其中调用了ext4_iget()函数,该函数的作用就是根据挂载的文件系统对应的超级块super_block,获取它对应的索引节点inode对象,然后对其进行初始化.
我们重点关注inode的文件操作指针i_fop成员的初始化过程.
struct inode *ext4_iget(struct super_block *sb, unsigned long ino) //定义在fs/ext4/inode.c中
{
.......
//根据inode的模式值进入不同分支
if (S_ISREG(inode->i_mode)) { //普通文件
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
} else if (S_ISDIR(inode->i_mode)) { //目录文件
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) { //链接文件
if (ext4_inode_is_fast_symlink(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
} else {
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { //字符设备,块设备,管道文件,套接字文件
inode->i_op = &ext4_special_inode_operations;
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
} else {
ret = -EIO;
EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
....
}
对于普通文件操作,inode->i_fop指向ext4_file_operations.
ext4_file_operations的定义如下:
const struct file_operations ext4_file_operations = {
.llseek = ext4_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext4_file_write,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.fallocate = ext4_fallocate,
};
从上面ext4_file_operations的赋值可以看出,有的方法是通用文件系统公共的方法,有的是ext4自定义方法,例如以ext4开头的是针对ext4类型的文件自定义方法.其它都是常规方法,就是被多个文件系统公用.对于目录和链接文件的分析省略,感兴趣的读者自行分析.
下面分析针对字符设备,块设备,管道文件,套接字文件等特殊文件的文件操作指针赋值过程.如上代码所是,会调用到init_special_inode()方法.
实际上所有不同文件系统类型都有调用到init_special_inode()方法.
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) //
{
inode->i_mode = mode;
if (S_ISCHR(mode)) { //字符设备
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
printk("cdev:major=%d,minor=%d\n", MAJOR(rdev), MINOR(rdev));
} else if (S_ISBLK(mode)) { //块设备
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
printk("block_device:major=%d,minor=%d\n", MAJOR(rdev), MINOR(rdev));
if (MAJOR(rdev) == 179 && MINOR(rdev) == 0) {
dump_stack();
mdelay(50);
}
} else if (S_ISFIFO(mode)) //管道文件
inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode)) //套接字文件
inode->i_fop = &bad_sock_fops;
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
以字符设备文件为例分析:
const struct file_operations def_chr_fops = { //定义在文件Char_dev.c中.
.open = chrdev_open,
.llseek = noop_llseek,
};
static int chrdev_open(struct inode *inode, struct file *filp) //定义在文件Char_dev.c中.
{
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) { //第一次访问为空
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); //查询cdev_map,根据设备号找到对应的kobject,如果设备驱动程序注册过,此处不会为空.见后面分析.
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new; //inode的i_cdev赋值为new
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
filp->f_op = fops_get(p->ops); //此处文件对象的f_op被赋值为设备驱动程序描述符的ops,这个值就是注册设备驱动程序时指定的.
if (!filp->f_op)
goto out_cdev_put;
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp); //调用open 方法
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}
我们在注册一个字符设备驱动程序时,是调用register_chrdev方法,它的原型为register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops),三个参数分别是设备的主设备号(为0 就是由内核动态分配),设备名称,也就是我们在/dev/下看到的名称,文件操作指针集合fops.
static inline int register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops)
{
return __register_chrdev(major, 0, 256, name, fops);
}
int __register_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
int err = -ENOMEM;
/*__register_chrdev_region()用于分配设备号,内核定义一个散列表chrdevs,它管理所有分配的字符设备号,实际上它是一个结构体char_device_struct的指针数组,数组元素个数为255,每个主设备号对应一个表项,每个结构体char_device_struct指针指向一个链表,链表的元素类型为char_device_struct,同一个链表中每个元素对应一个相同的主设备号,不同的从设备号范围.
static struct char_device_struct {
struct char_device_struct *next;
unsigned int major;
unsigned int baseminor;
int minorct;
char name[64];
struct cdev *cdev;/* will die */
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; //CHRDEV_MAJOR_HASH_SIZE为255
*/
cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc(); //创建一个新的字符设备驱动程序对象cdev.并初始化其中的kobject引用计数.
if (!cdev)
goto out2;
cdev->owner = fops->owner;
cdev->ops = fops; //此处将fops赋值给字符设备对象的ops成员.
kobject_set_name(&cdev->kobj, "%s", name);
/*cdev_add()是将cdev注册到设备驱动程序模型中,设备驱动程序模型为所有字符设备建立了一个kobject的映射域,由全局变量cdev_map指向,cdev_map的类型为kobj_map,
struct kobj_map {
struct probe {
struct probe *next;
dev_t dev;
unsigned long range;
struct module *owner;
kobj_probe_t *get;
int (*lock)(dev_t, void *);
void *data;
} *probes[255];
struct mutex *lock;
};probes为probe类型的结构体数组,数组元素个数为255,就是每个主设备号对应一个数组元素 ,每个元素又是由probe构成的链表,链表管理主设备号相同,从设备号范围不同的所有元素,这与前面的chrdevs管理类似.cdev_add()调用kojb_map()将cdev其相关元素插入到cdev_map的相应链表中.cdev对象实际保存在probe的data成员中.内核这样管理的目的就是为了便于根据kobject对象地址能够迅速找到对应的cdev描述符(调用kobj_lookup()检索).
与字符设备管理类似,对应有一个bdev_map管理块设备的kojbect映射.major_names管理所有块设备的设备号.我们在使用cat /proc/devices中打印出来的内容就是遍历chrdevs和major_names中的所有元素的输出.
*/
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
cd->cdev = cdev;
return major ? 0 : cd->major;
out:
kobject_put(&cdev->kobj);
out2:
kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
struct cdev {
struct kobject kobj;
struct module *owner;
const struct file_operations *ops; //指向实现设备驱动程序对应的所有设备文件操作指针
struct list_head list; //一个字符设备驱动程序可以管理多个设备文件(它们对应同一个主设备号,i且位于相同的从设备号范围内),list就是这些设备文件对应的索引节点inode构成的链表
dev_t dev; //设备号起始值(对应主设备号,从设备号的起始值)
unsigned int count; //从设备号范围
};