mount系统调用(sys_mount)分析

sys_mount分析

  • sys_mount代码分析

   Linux下任何块设备在使用之前,首先要对其进行格式化成特定文件系统,如mkfs.ext4等,
   又如ramdisk设备,在内核初始化时,会指定内存虚拟成ext2文件系统格式的块设备。挂载时以ext2格式文件系统挂载。
   然后再将其挂载到系统中特定目录下,于是通过该挂载点即可访问该块设备文件系统下的所有文件和目录等。如 mount -t jffs2 /dev/mtdblock0 /home/test,便将设备/dev/mtdblock0 挂载到了/home/test下.

sys_mount代码分析

fs/namespace.c

SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
		char __user *, type, unsigned long, flags, void __user *, data)
{
	int ret;
	char *kernel_type;
	char *kernel_dev;
	void *options;

	kernel_type = copy_mount_string(type);
	ret = PTR_ERR(kernel_type);
	if (IS_ERR(kernel_type))
		goto out_type;

	kernel_dev = copy_mount_string(dev_name);
	ret = PTR_ERR(kernel_dev);
	if (IS_ERR(kernel_dev))
		goto out_dev;

	options = copy_mount_options(data);
	ret = PTR_ERR(options);
	if (IS_ERR(options))
		goto out_data;

	ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);

	kfree(options);
out_data:
	kfree(kernel_dev);
out_dev:
	kfree(kernel_type);
out_type:
	return ret;
}

   内核mount函数入口为sys_mount(),实现在fs/namespace.c中,代码如上。其中kernel_type为挂载文件系统的类型;dir_name为挂载点的路径(用户态路径),kernel_dev为块设备名称。options 为选项信息。
   主要做的工作为从用户态拷贝数据。随后执行do_mount进行接下来的工作
fs/namespace.c->do_mount

/* 参数:
 * dev_name: 挂载块设备名称
 * dir_name: 挂载点名称
 * type_page: 挂载的文件系统类型如“ext4”“jffs2”等
 * flags: 挂载标志
 * data_page: 传参,正常情况为null
 */
long do_mount(const char *dev_name, const char __user *dir_name,
		const char *type_page, unsigned long flags, void *data_page)
{
	struct path path;
	unsigned int mnt_flags = 0, sb_flags;
	int retval = 0;
	......
	/* ... and get the mountpoint */
	retval = user_path(dir_name, &path);							(1)
	if (retval)
		return retval;

	retval = security_sb_mount(dev_name, &path,						(2)
				   type_page, flags, data_page);
	if (!retval && !may_mount())
		retval = -EPERM;
	if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
		retval = -EPERM;
	if (retval)
		goto dput_out;

	/* Default to relatime unless overriden */
	if (!(flags & MS_NOATIME))
		mnt_flags |= MNT_RELATIME;

	/* Separate the per-mountpoint flags */
	if (flags & MS_NOSUID)
		mnt_flags |= MNT_NOSUID;
	if (flags & MS_NODEV)
		mnt_flags |= MNT_NODEV;
	if (flags & MS_NOEXEC)
		mnt_flags |= MNT_NOEXEC;
	if (flags & MS_NOATIME)
		mnt_flags |= MNT_NOATIME;
	if (flags & MS_NODIRATIME)
		mnt_flags |= MNT_NODIRATIME;
	if (flags & MS_STRICTATIME)
		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
	if (flags & MS_RDONLY)
		mnt_flags |= MNT_READONLY;

	/* The default atime for remount is preservation */
	if ((flags & MS_REMOUNT) &&
	    ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
		       MS_STRICTATIME)) == 0)) {
		mnt_flags &= ~MNT_ATIME_MASK;
		mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
	}

	sb_flags = flags & (SB_RDONLY |
			    SB_SYNCHRONOUS |
			    SB_MANDLOCK |
			    SB_DIRSYNC |
			    SB_SILENT |
			    SB_POSIXACL |
			    SB_LAZYTIME |
			    SB_I_VERSION);

	if (flags & MS_REMOUNT)
		retval = do_remount(&path, flags, sb_flags, mnt_flags,
				    data_page);
	else if (flags & MS_BIND)
		retval = do_loopback(&path, dev_name, flags & MS_REC);
	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
		retval = do_change_type(&path, flags);
	else if (flags & MS_MOVE)
		retval = do_move_mount(&path, dev_name);
	else
		retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,	(3)
				      dev_name, data_page);
dput_out:
	path_put(&path);
	return retval;
}

(1)user_path:该函数的作用是根据传进来的路径,获取路径对应的path数据结构,path数据结构如下:

struct path {
	struct vfsmount *mnt;
	struct dentry *dentry;
} __randomize_layout;

   struct path中包含了vfsmount数据结构指针,以及当前路径的struct dentry数据结构。dentry就是当前的路径,此处获取到的vfsmount,为路径当前文件系统的vfs_mount。如根文件系统是基于内存的ramdisk,则文件系统启动后,未挂载其他块设备时,所有路径的vfs_mount都指向ramdisk对应的vfs_mount。
   在挂载完成后,这个指针会指向目标块设备的vfs_mount。
(注意:函数名为user_path,传递的也是用户态的路径信息。)
(2)安全相关,忽略
(3)do_new_mount接手接下来的挂载工作
fs/namespace.c->do_new_mount

/* 参数:
 * path: 挂载点信息dentry、vfs_mount
 * fstype: 挂载文件系统类型
 * sb_flags: super block标志
 * mnt_flags: 挂载标志
 * name: 设备名称
 * data: 传递私有数据,一般是NULL
 */
static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
			int mnt_flags, const char *name, void *data)
{
	struct file_system_type *type;
	struct vfsmount *mnt;
	int err;

	if (!fstype)
		return -EINVAL;

	type = get_fs_type(fstype);									(1)
	if (!type)
		return -ENODEV;

	mnt = vfs_kern_mount(type, sb_flags, name, data);			(2)
	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
	    !mnt->mnt_sb->s_subtype)
		mnt = fs_set_subtype(mnt, fstype);

	put_filesystem(type);
	if (IS_ERR(mnt))
		return PTR_ERR(mnt);

	if (mount_too_revealing(mnt, &mnt_flags)) {
		mntput(mnt);
		return -EPERM;
	}

	err = do_add_mount(real_mount(mnt), path, mnt_flags);		(3)
	if (err)
		mntput(mnt);
	return err;
}

(1)get_fs_type:通过文件系统类型(字符串),获取对应的struct file_system_type数据结构,每种注册到内核的文件系统类型,都有对应的该数据结构。数据结构如下:

struct file_system_type {
	const char *name;
	int fs_flags;
#define FS_REQUIRES_DEV		1 
#define FS_BINARY_MOUNTDATA	2
#define FS_HAS_SUBTYPE		4
#define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
#define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
	struct dentry *(*mount) (struct file_system_type *, int,
		       const char *, void *);
	void (*kill_sb) (struct super_block *);
	struct module *owner;
	struct file_system_type * next;
	struct hlist_head fs_supers;

	struct lock_class_key s_lock_key;
	struct lock_class_key s_umount_key;
	struct lock_class_key s_vfs_rename_key;
	struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];

	struct lock_class_key i_lock_key;
	struct lock_class_key i_mutex_key;
	struct lock_class_key i_mutex_dir_key;
};

static struct file_system_type jffs2_fs_type = {
	.owner =	THIS_MODULE,
	.name =		"jffs2",
	.mount =	jffs2_mount,
	.kill_sb =	jffs2_kill_sb,
};
MODULE_ALIAS_FS("jffs2");

   文件系统中该数据结构实现举例了“jffs2”的,基本上也只需要实现上面几个,剩下的注册时会填充。
   这里很显而易见的可以猜到,.mount回调,mount过程中会用到。
(2)vfs_kern_mount():该函数主要是为新的文件系统准备一个挂载结构vfsmount,初始化,在内存中构建文件系统的轮廓,会在后面具体描述这一过程;
(3)do_add_mount():将(2)中创建的vfsmount结构添加到全局结构中,以便在内存中形成一棵树结构。

   下面逐个解析一下这两个函数。
fs/namespace.c->vfs_kern_mount

/* 参数:
 * type: 特定文件系统file_system_type数据结构指针
 * flags: 标志
 * name: 设备名称“/dev/mtdblock0”
 * data: 传递私有数据,一般是NULL
 */
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct mount *mnt;
	struct dentry *root;

	if (!type)
		return ERR_PTR(-ENODEV);

	mnt = alloc_vfsmnt(name);					/* 申请struct mount数据结构 */
	if (!mnt)
		return ERR_PTR(-ENOMEM);

	if (flags & SB_KERNMOUNT)
		mnt->mnt.mnt_flags = MNT_INTERNAL;

	root = mount_fs(type, flags, name, data);	/* 获取目标块设备root dentry */
	if (IS_ERR(root)) {
		mnt_free_id(mnt);
		free_vfsmnt(mnt);
		return ERR_CAST(root);
	}

	mnt->mnt.mnt_root = root;
	mnt->mnt.mnt_sb = root->d_sb;
	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	mnt->mnt_parent = mnt;
	lock_mount_hash();
	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
	unlock_mount_hash();
	return &mnt->mnt;
}

  首先这里首先申请了struct mount数据结构的内存。struct mount数据结构如下:

struct mount {
	struct hlist_node mnt_hash;
	struct mount *mnt_parent;
	struct dentry *mnt_mountpoint;
	struct vfsmount mnt;
	......
} __randomize_layout;
//主要为vfsmount,以及mnt_mountpoint,和父mount指针。

  随后调用mount_fs获取目标块设备的root dentry数据结构,其中还包含了super block数据结构。
  下面一起看一下mount_fs函数实现
fs/super.c->mount_fs

/* 参数:
 * type: 特定文件系统file_system_type数据结构指针
 * flags: 标志
 * name: 设备名称“/dev/mtdblock0”
 * data: 传递私有数据,一般是NULL
 */
struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct dentry *root;
	struct super_block *sb;
	char *secdata = NULL;
	int error = -ENOMEM;

	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
		secdata = alloc_secdata();
		if (!secdata)
			goto out;

		error = security_sb_copy_data(data, secdata);
		if (error)
			goto out_free_secdata;
	}

	root = type->mount(type, flags, name, data);
	if (IS_ERR(root)) {
		error = PTR_ERR(root);
		goto out_free_secdata;
	}
	sb = root->d_sb;
	BUG_ON(!sb);
	WARN_ON(!sb->s_bdi);

	/*
	 * Write barrier is for super_cache_count(). We place it before setting
	 * SB_BORN as the data dependency between the two functions is the
	 * superblock structure contents that we just set up, not the SB_BORN
	 * flag.
	 */
	smp_wmb();
	sb->s_flags |= SB_BORN;

	error = security_sb_kern_mount(sb, flags, secdata);
	if (error)
		goto out_sb;

	/*
	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
	 * but s_maxbytes was an unsigned long long for many releases. Throw
	 * this warning for a little while to try and catch filesystems that
	 * violate this rule.
	 */
	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
		"negative value (%lld)\n", type->name, sb->s_maxbytes);

	up_write(&sb->s_umount);
	free_secdata(secdata);
	return root;
out_sb:
	dput(root);
	deactivate_locked_super(sb);
out_free_secdata:
	free_secdata(secdata);
out:
	return ERR_PTR(error);
}

  代码也比较简单,重点来看调用了type->mount回调。这里以jffs2文件系统为例,回到上面的jffs2_fs_type中,可见jffs2的mount回调为“jffs2_mount”,简单的看下代码,可以看到,不只是返回root dentry,回调该函数时,也会进行super block的填充。并将super block的指针赋值为root dentry的d_sb。
fs/jffs2/super.c->jffs2_mount

static struct dentry *jffs2_mount(struct file_system_type *fs_type,
			int flags, const char *dev_name,
			void *data)
{
	return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
}

  再回到vfs_kern_mount函数,在执行完mount_fs,获取到root dentry后,

	mnt->mnt.mnt_root = root;
	mnt->mnt.mnt_sb = root->d_sb;
	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	mnt->mnt_parent = mnt;

  设置刚申请的struct mount->struct vfs_mount的参数,mnt_root denrty节点,mnt_sb super block数据结构,以及mnt_mountpoint dentry节点。
  这里需要特殊说明的是,vfs_mount的mnt_root和mount的mnt_mountpoint是不一样的,这里暂时设置成一样,随后会单独设置mnt_mountpoint。
  可以这样理解,如我要挂载的磁盘里面有自己的目录结构,mnt_root指未挂载前相对自己的根目录(/),如磁盘理由test1.c,则未挂在前的路径为/test1.c。当挂载到/home/test下时,/home/test就是mnt_mountpoint,此时挂载磁盘的文件路径为/home/test/test1.c,由两部分组成。/home/test和/test1.c

  到这里,vfs_kern_mount工作结束了,返回的指针为vfs_mount类型的。
  下面来看do_add_mount实现:
fs/namespace.c->do_add_mount

/* 参数:
 * mount: vfs_kern_mount执行完后获取到的struct mount数据指针
 * path: 挂载点信息dentry、vfs_mount
 * mnt_flags: 挂载标志
 */
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
	struct mountpoint *mp;
	struct mount *parent;
	int err;

	mnt_flags &= ~MNT_INTERNAL_FLAGS;

	mp = lock_mount(path);								//根据路径dentry获取mountpoint数据结构
	if (IS_ERR(mp))
		return PTR_ERR(mp);

	parent = real_mount(path->mnt);						//获取未挂载前的父mount指针
	err = -EINVAL;
	if (unlikely(!check_mnt(parent))) {
		/* that's acceptable only for automounts done in private ns */
		if (!(mnt_flags & MNT_SHRINKABLE))
			goto unlock;
		/* ... and for those we'd better have mountpoint still alive */
		if (!parent->mnt_ns)
			goto unlock;
	}

	/* Refuse the same filesystem on the same mount point */
	err = -EBUSY;
	if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&		//重复挂载无意义
	    path->mnt->mnt_root == path->dentry)
		goto unlock;

	err = -EINVAL;
	if (d_is_symlink(newmnt->mnt.mnt_root))
		goto unlock;

	newmnt->mnt.mnt_flags = mnt_flags;
	err = graft_tree(newmnt, parent, mp);

unlock:
	unlock_mount(mp);
	return err;
}

  do_add_mount()主要调用了函数graft_tree()来实现linux文件系统的目录树结构。
fs/namespace.c->graft_tree

static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
{
	if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
		return -EINVAL;

	if (d_is_dir(mp->m_dentry) !=
	      d_is_dir(mnt->mnt.mnt_root))
		return -ENOTDIR;

	return attach_recursive_mnt(mnt, p, mp, NULL);
}

graft_tree主要调用attach_recursive_mnt:
fs/namespace.c->attach_recursive_mnt

/* 参数:
 * source_mnt: 源mount数据结构
 * dest_mnt: 目标mount数据结构(parent)
 * mnt_flags: 挂载点数据结构
 * parent_path: 通常为NULL
 */
static int attach_recursive_mnt(struct mount *source_mnt,
			struct mount *dest_mnt,
			struct mountpoint *dest_mp,
			struct path *parent_path)
{
	HLIST_HEAD(tree_list);
	struct mnt_namespace *ns = dest_mnt->mnt_ns;
	struct mountpoint *smp;
	struct mount *child, *p;
	struct hlist_node *n;
	int err;

	/* Preallocate a mountpoint in case the new mounts need
	 * to be tucked under other mounts.
	 */
	smp = get_mountpoint(source_mnt->mnt.mnt_root);				/* 根据源mount—>vfs_mount.mnt_root dentry获取mountpoint数据结构 */
	if (IS_ERR(smp))
		return PTR_ERR(smp);

	/* Is there space to add these mounts to the mount namespace? */
	if (!parent_path) {
		err = count_mounts(ns, source_mnt);
		if (err)
			goto out;
	}

	if (IS_MNT_SHARED(dest_mnt)) {
		err = invent_group_ids(source_mnt, true);
		if (err)
			goto out;
		err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
		lock_mount_hash();
		if (err)
			goto out_cleanup_ids;
		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
			set_mnt_shared(p);
	} else {
		lock_mount_hash();
	}
	if (parent_path) {
		detach_mnt(source_mnt, parent_path);
		attach_mnt(source_mnt, dest_mnt, dest_mp);
		touch_mnt_namespace(source_mnt->mnt_ns);
	} else {
		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);				(1)
		commit_tree(source_mnt);
	}

	hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {			//这里操作不是很明白
		struct mount *q;
		hlist_del_init(&child->mnt_hash);
		q = __lookup_mnt(&child->mnt_parent->mnt,
				 child->mnt_mountpoint);
		if (q)
			mnt_change_mountpoint(child, smp, q);
		commit_tree(child);
	}
	put_mountpoint(smp);
	unlock_mount_hash();

	return 0;

 out_cleanup_ids:
	while (!hlist_empty(&tree_list)) {
		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
		child->mnt_parent->mnt_ns->pending_mounts = 0;
		umount_tree(child, UMOUNT_SYNC);
	}
	unlock_mount_hash();
	cleanup_group_ids(source_mnt, NULL);
 out:
	ns->pending_mounts = 0;

	read_seqlock_excl(&mount_lock);
	put_mountpoint(smp);
	read_sequnlock_excl(&mount_lock);

	return err;
}

(1)主要调用mnt_set_mountpoint函数来设置参数。

/*
 * vfsmount lock must be held for write
 */
void mnt_set_mountpoint(struct mount *mnt,
			struct mountpoint *mp,
			struct mount *child_mnt)
{
	mp->m_count++;
	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
	child_mnt->mnt_mountpoint = dget(mp->m_dentry);
	child_mnt->mnt_parent = mnt;
	child_mnt->mnt_mp = mp;
	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}

  首先将父mount数据结构的mount次数+1,随后设置子mount数据结构的mnt_mountpoint 为挂在路径的dentry。这里也验证了上文提到的vfs_mount的mnt_root和mount的mnt_mountpoint的区别。
  设置mnt_parent,设置mountpoint,并将child_mnt->mnt_mp_list设置到mp的挂载链表中。
  mnt_set_mountpoint和commit_tree,两个函数干的事情都比较简单,主要是将当前新建的vfsmount结构与挂载点挂钩,并和挂载点所在的vfsmount形成一种父子关系结构。形成的结构图如下所示:
mount系统调用(sys_mount)分析_第1张图片

你可能感兴趣的:(Linux,文件系统,linux)