Linux虚拟文件系统(安装根文件系统)

  安装根文件系统式系统初始化的关键部分。Linux内核允许根文件系统放在很多不同的地方,比如硬盘分区、软盘、通过NFS共享的远程文件系统以及保存在ramdisk中。内核要在变量ROOT_DEV中寻找包含根文件系统的磁盘主设备号。当编译内核时,或者像最初的启动装入程序传递一个合适的“root”选项时,根文件系统可以被指定为/dev目录下的一个设备文件。

安装根文件系统分为两个阶段:

1,内核安装特殊rootfs文件系统,该文件系统仅提供一个作为初始安装点的空目录

start_kernel()->vfs_caches_init()->mnt_init()->init_rootfs()

/*初始化根文件系统*/
int __init init_rootfs(void)
{
	int err;
	/*初始化ramfs_backing_dev_info*/
	err = bdi_init(&ramfs_backing_dev_info);
	if (err)
		return err;
	/*注册rootfs_fs_type文件类型*/
	err = register_filesystem(&rootfs_fs_type);
	if (err)/*如果出错,销毁上面初始化的*/
		bdi_destroy(&ramfs_backing_dev_info);

	return err;
}
static struct backing_dev_info ramfs_backing_dev_info = {
	.name		= "ramfs",
	.ra_pages	= 0,	/* No readahead */
	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK |
			  BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
			  BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
};
/**
 *	register_filesystem - register a new filesystem
 *	@fs: the file system structure
 *
 *	Adds the file system passed to the list of file systems the kernel
 *	is aware of for mount and other syscalls. Returns 0 on success,
 *	or a negative errno code on an error.
 *
 *	The &struct file_system_type that is passed is linked into the kernel 
 *	structures and must not be freed until the file system has been
 *	unregistered.
 */
 /*注册一个新的文件系统*/
int register_filesystem(struct file_system_type * fs)
{
	int res = 0;
	struct file_system_type ** p;

	BUG_ON(strchr(fs->name, '.'));
	if (fs->next)
		return -EBUSY;
	INIT_LIST_HEAD(&fs->fs_supers);
	write_lock(&file_systems_lock);
	/*从system_type链表中查找指定名称的file_system_type*/
	p = find_filesystem(fs->name, strlen(fs->name));
	if (*p)
		res = -EBUSY;
	else
		*p = fs;
	write_unlock(&file_systems_lock);
	return res;
}

根文件系统定义如下

static struct file_system_type rootfs_fs_type = {
	.name		= "rootfs",
	.get_sb		= rootfs_get_sb,
	.kill_sb	= kill_litter_super,
};

下面看看他的两个函数

/*获得根目录的sb*/
static int rootfs_get_sb(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
			    mnt);
}
int get_sb_nodev(struct file_system_type *fs_type,
	int flags, void *data,
	int (*fill_super)(struct super_block *, void *, int),
	struct vfsmount *mnt)
{
	int error;
	/*获得sb结构*/
	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);

	if (IS_ERR(s))
		return PTR_ERR(s);

	s->s_flags = flags;
	/*这里实际调用ramfs_fill_super,对sb结构的属性进行设置*/
	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
	if (error) {
		deactivate_locked_super(s);
		return error;
	}
	s->s_flags |= MS_ACTIVE;
	simple_set_mnt(mnt, s);/*设置mnt和sb关联*/
	return 0;
}
/**
 *	sget	-	find or create a superblock
 *	@type:	filesystem type superblock should belong to
 *	@test:	comparison callback
 *	@set:	setup callback
 *	@data:	argument to each of them
 */
 /*查找或创建一个sb结构*/
struct super_block *sget(struct file_system_type *type,
			int (*test)(struct super_block *,void *),
			int (*set)(struct super_block *,void *),
			void *data)
{
	struct super_block *s = NULL;
	struct super_block *old;
	int err;

retry:
	spin_lock(&sb_lock);
	if (test) {
		list_for_each_entry(old, &type->fs_supers, s_instances) {
			if (!test(old, data))
				continue;
			if (!grab_super(old))
				goto retry;
			if (s) {
				up_write(&s->s_umount);
				destroy_super(s);
			}
			return old;
		}
	}
	if (!s) {/*如果找不到sb,从内存中申请一个*/
		spin_unlock(&sb_lock);
		s = alloc_super(type);
		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}
		
	err = set(s, data);
	if (err) {
		spin_unlock(&sb_lock);
		up_write(&s->s_umount);
		destroy_super(s);
		return ERR_PTR(err);
	}
	/*初始化得到的sb结构*/
	s->s_type = type;
	strlcpy(s->s_id, type->name, sizeof(s->s_id));
	/*加入链表尾*/
	list_add_tail(&s->s_list, &super_blocks);
	list_add(&s->s_instances, &type->fs_supers);
	spin_unlock(&sb_lock);
	get_filesystem(type);
	return s;
}
/*所有超级块对象都以双向循环链表的形式链接在一起,量表中第一个
元素用super_blocks变量表示,而超级块对象的s_list字段存放指向链表
相邻元素的指针*/
LIST_HEAD(super_blocks);
/**
 *	alloc_super	-	create new superblock
 *	@type:	filesystem type superblock should belong to
 *
 *	Allocates and initializes a new &struct super_block.  alloc_super()
 *	returns a pointer new superblock or %NULL if allocation had failed.
 */
static struct super_block *alloc_super(struct file_system_type *type)
{	
	/*从内存中申请sb*/
	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
	static const struct super_operations default_op;

	if (s) {
		if (security_sb_alloc(s)) {
			kfree(s);
			s = NULL;
			goto out;
		}
		/*初始化*/
		INIT_LIST_HEAD(&s->s_files);
		INIT_LIST_HEAD(&s->s_instances);
		INIT_HLIST_HEAD(&s->s_anon);
		INIT_LIST_HEAD(&s->s_inodes);
		INIT_LIST_HEAD(&s->s_dentry_lru);
		init_rwsem(&s->s_umount);
		mutex_init(&s->s_lock);
		lockdep_set_class(&s->s_umount, &type->s_umount_key);
		/*
		 * The locking rules for s_lock are up to the
		 * filesystem. For example ext3fs has different
		 * lock ordering than usbfs:
		 */
		lockdep_set_class(&s->s_lock, &type->s_lock_key);
		/*
		 * sget() can have s_umount recursion.
		 *
		 * When it cannot find a suitable sb, it allocates a new
		 * one (this one), and tries again to find a suitable old
		 * one.
		 *
		 * In case that succeeds, it will acquire the s_umount
		 * lock of the old one. Since these are clearly distrinct
		 * locks, and this object isn't exposed yet, there's no
		 * risk of deadlocks.
		 *
		 * Annotate this by putting this lock in a different
		 * subclass.
		 */
		down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
		s->s_count = S_BIAS;
		atomic_set(&s->s_active, 1);
		mutex_init(&s->s_vfs_rename_mutex);
		mutex_init(&s->s_dquot.dqio_mutex);
		mutex_init(&s->s_dquot.dqonoff_mutex);
		init_rwsem(&s->s_dquot.dqptr_sem);
		init_waitqueue_head(&s->s_wait_unfrozen);
		s->s_maxbytes = MAX_NON_LFS;
		s->dq_op = sb_dquot_ops;
		s->s_qcop = sb_quotactl_ops;
		s->s_op = &default_op;
		s->s_time_gran = 1000000000;
	}
out:
	return s;
}

kill_litter_super的过程相反,这里不再写了。

构造根目录是由init_mount_tree()函数实现的,该函数在前面已经介绍过了。

2,安装实际根文件系统

关于__setup宏

__setup宏来注册关键字及相关联的处理函数,__setup宏在include/linux/init.h中定义,其原型如下:
__setup(string, _handler);
其中:string是关键字,_handler是关联处理函数。__setup只是告诉内核在启动时输入串中含有string时,内核要去
执行_handler。String必须以“=”符结束以使parse_args更方便解析。紧随“=”后的任何文本都会作为输入传给
_handler。下面的例子来自于init/do_mounts.c,其中root_dev_setup作为处理程序被注册给“root=”关键字:
 __setup("root=", root_dev_setup);

比如我们在启动向参数终有

  noinitrd root=/dev/mtdblock2 console=/linuxrc

 setup_arch解释时会发现root=/dev/mtdblock2,然后它就会调用root_dev_setup

static int __init root_dev_setup(char *line)
{
	strlcpy(saved_root_name, line, sizeof(saved_root_name));
	return 1;
}

__setup("root=", root_dev_setup);


Start_kernel->rest_init->init-> prepare_namespace->

/*
 * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
 */
void __init prepare_namespace(void)
{
	int is_floppy;

	if (root_delay) {
		printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
		       root_delay);
		ssleep(root_delay);
	}

	/*
	 * wait for the known devices to complete their probing
	 *
	 * Note: this is a potential source of long boot delays.
	 * For example, it is not atypical to wait 5 seconds here
	 * for the touchpad of a laptop to initialize.
	 */
	wait_for_device_probe();
	/*创建/dev/ram0,必须得,因为initrd要放到/dev/ram0里*/
	md_run_setup();

	if (saved_root_name[0]) {/*saved_root_name为从启动参数"root"中获取的设备文件名*/
		root_device_name = saved_root_name;
		if (!strncmp(root_device_name, "mtd", 3) ||
		    !strncmp(root_device_name, "ubi", 3)) {/*如果设备名开头为这两个*/
			mount_block_root(root_device_name, root_mountflags);
			goto out;
		}
		/*主设备号和次设备号*/
		ROOT_DEV = name_to_dev_t(root_device_name);
		if (strncmp(root_device_name, "/dev/", 5) == 0)
			root_device_name += 5;/*滤掉'/dev/'字符*/
	}

	if (initrd_load())
		goto out;

	/* wait for any asynchronous scanning to complete */
	if ((ROOT_DEV == 0) && root_wait) {
		printk(KERN_INFO "Waiting for root device %s...\n",
			saved_root_name);
		while (driver_probe_done() != 0 ||
			(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
			msleep(100);
		async_synchronize_full();
	}

	is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;

	if (is_floppy && rd_doload && rd_load_disk(0))
		ROOT_DEV = Root_RAM0;
	/*实际操作*/
	mount_root();
out:
	devtmpfs_mount("dev");/*devfs从虚拟的根文件系统的/dev umount*/
	sys_mount(".", "/", NULL, MS_MOVE, NULL);/*将挂载点从当前目录【/root】(在mount_root函数中设置的)移到根目录*/
	/*当前目录即【/root】(真正文件系统挂载的目录)做为系统根目录*/
	sys_chroot(".");
}

mount_root操作

void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
	if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
		if (mount_nfs_root())
			return;

		printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
		ROOT_DEV = Root_FD0;
	}
#endif
#ifdef CONFIG_BLK_DEV_FD
	if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
		/* rd_doload is 2 for a dual initrd/ramload setup */
		if (rd_doload==2) {
			if (rd_load_disk(1)) {
				ROOT_DEV = Root_RAM1;
				root_device_name = NULL;
			}
		} else
			change_floppy("root floppy");
	}
#endif
#ifdef CONFIG_BLOCK/*这里是一般流程*/
	create_dev("/dev/root", ROOT_DEV);/*用系统调用创建"/dev/root"*/
	mount_block_root("/dev/root", root_mountflags);
#endif
}
void __init mount_block_root(char *name, int flags)
{
	/*从cache中分配空间*/
	char *fs_names = __getname_gfp(GFP_KERNEL
		| __GFP_NOTRACK_FALSE_POSITIVE);
	char *p;
#ifdef CONFIG_BLOCK
	char b[BDEVNAME_SIZE];
#else
	const char *b = name;
#endif
	/*获得文件系统类型,如果在bootoption里有,
	则就为这个文件系统类型,如果没有指定,
	则返回ilesytem链上所有类型,下面再对每个进行尝试.*/
	get_fs_names(fs_names);
retry:
	for (p = fs_names; *p; p += strlen(p)+1) {
		/*实际的安装工作,这里调用了mount系统调用
		将文件系统挂到/root目录,p为文件系统类型,由get_fs_names得到
		*/
		int err = do_mount_root(name, p, flags, root_mount_data);
		switch (err) {
			case 0:
				goto out;
			case -EACCES:
				flags |= MS_RDONLY;
				goto retry;
			case -EINVAL:
				continue;
		}
	        /*
		 * Allow the user to distinguish between failed sys_open
		 * and bad superblock on root device.
		 * and give them a list of the available devices
		 */
#ifdef CONFIG_BLOCK
		__bdevname(ROOT_DEV, b);
#endif
		printk("VFS: Cannot open root device \"%s\" or %s\n",
				root_device_name, b);
		printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");

		printk_all_partitions();
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
		printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
		       "explicit textual name for \"root=\" boot option.\n");
#endif
		panic("VFS: Unable to mount root fs on %s", b);
	}

	printk("List of all partitions:\n");
	printk_all_partitions();
	printk("No filesystem could mount root, tried: ");
	for (p = fs_names; *p; p += strlen(p)+1)
		printk(" %s", p);
	printk("\n");
#ifdef CONFIG_BLOCK
	__bdevname(ROOT_DEV, b);
#endif
	panic("VFS: Unable to mount root fs on %s", b);
out:
	putname(fs_names);
}
 
static int __init do_mount_root(char *name, char *fs, int flags, void *data)
{
	/*mount系统调用来做实际的安装文件系统工作*/
	int err = sys_mount(name, "/root", fs, flags, data);
	if (err)
		return err;
	/*改变当前路径到根目录*/
	sys_chdir("/root");
	ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
	printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
	       current->fs->pwd.mnt->mnt_sb->s_type->name,
	       current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
	       " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
	return 0;
}

到此,根文件系统的安装过程算是完成了,中间关于mount等系统调用将在后面分析。可以看出总的步骤主要有:

1,创建一个rootfs,这个是虚拟的rootfs,是内存文件系统(和ramfs),后面还会指向具体的根文件系统;

2,从系统启动参数中获取设备文件名以及设备号;

3,调用系统调用创建符号链接,并调用mount系统调用进程实际的安装操作;

4,改变进程当前目录;

5,移动rootfs文件系统根目录上得已经安装文件系统的安装点;
rootfs特殊文件系统没有被卸载,他只是隐藏在基于磁盘的根文件系统下了。

你可能感兴趣的:(Linux虚拟文件系统(安装根文件系统))