Linux虚拟文件系统(内核初始化<一>)

 Linux虚拟文件系统在内核初始化的start_kernel()函数中主要调用两个函数来实现。
asmlinkage void __init start_kernel(void)
{
	……
	vfs_caches_init_early();
	……
	vfs_caches_init(totalram_pages);
	……
}

一、早期初始化

虚拟文件系统的早期初始化有函数vfs_caches_init_early()实现,主要负责dentryinodehashtable的初始化工作。

/*在start_kernel中调用,用于文件系统中早期的初始化*/
void __init vfs_caches_init_early(void)
{
	/*初始化两个hashtable*/
	dcache_init_early();
	inode_init_early();
}

1.1 dcache

static void __init dcache_init_early(void)
{
	int loop;

	/* If hashes are distributed across NUMA nodes, defer
	 * hash allocation until vmalloc space is available.
	 */
	if (hashdist)
		return;
	/*dentry hashtable的空间分配*/
	dentry_hashtable =
		alloc_large_system_hash("Dentry cache",
					sizeof(struct hlist_head),
					dhash_entries,
					13,
					HASH_EARLY,
					&d_hash_shift,
					&d_hash_mask,
					0);
	/*hashtable的各个链表初始化*/
	for (loop = 0; loop < (1 << d_hash_shift); loop++)
		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}

1.2  inode

/*
 * Initialize the waitqueues and inode hash table.
 */
void __init inode_init_early(void)
{
	int loop;

	/* If hashes are distributed across NUMA nodes, defer
	 * hash allocation until vmalloc space is available.
	 */
	if (hashdist)
		return;
	/*从cache中分配inode hashtable的内存空间*/
	inode_hashtable =
		alloc_large_system_hash("Inode-cache",
					sizeof(struct hlist_head),
					ihash_entries,
					14,
					HASH_EARLY,
					&i_hash_shift,
					&i_hash_mask,
					0);
	/*初始化hashtable 的各个链表*/
	for (loop = 0; loop < (1 << i_hash_shift); loop++)
		INIT_HLIST_HEAD(&inode_hashtable[loop]);
}

二、后期初始化

这阶段对inodedentrymount、字符设备驱动模型以及块设备驱动模型做了相应的初始化。

/*vfs初始化,在start_kernel中调用*/
void __init vfs_caches_init(unsigned long mempages)
{
	unsigned long reserve;

	/* Base hash sizes on available memory, with a reserve equal to
           150% of current kernel size */

	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
	mempages -= reserve;
	/*为路径名申请的cache*/
	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
	/*dentry及其相关内容初始化*/
	dcache_init();
	inode_init();/*inode初始化*/
	files_init(mempages);/*文件相关信息初始化,包括文件描述符表初始化*/
	mnt_init();/*mount 的初始化*/
	bdev_cache_init();
	/*字符设备驱动模型的初始化*/
	chrdev_init();
}

2.1 dentry初始化

static void __init dcache_init(void)
{
	int loop;

	/* 
	 * A constructor could be added for stable state like the lists,
	 * but it is probably not worth it because of the cache nature
	 * of the dcache. 
	 *//*从cache中申请目录cache*/
	dentry_cache = KMEM_CACHE(dentry,
		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
	/*注册一个shrinker*/
	register_shrinker(&dcache_shrinker);

	/* Hash may have been set up in dcache_init_early */
	if (!hashdist)
		return;
	/*下面的操作在前面的初始化中已经做了*/
	dentry_hashtable =
		alloc_large_system_hash("Dentry cache",
					sizeof(struct hlist_head),
					dhash_entries,
					13,
					0,
					&d_hash_shift,
					&d_hash_mask,
					0);

	for (loop = 0; loop < (1 << d_hash_shift); loop++)
		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}

2.2 inode初始化

void __init inode_init(void)
{
	int loop;

	/* inode slab cache */
    /*slab中分配inode缓存*/
	inode_cachep = kmem_cache_create("inode_cache",
					 sizeof(struct inode),
					 0,
					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
					 SLAB_MEM_SPREAD),
					 init_once);
    /*注册icache shrinker,将参数对应的shrinker加入指定队列*/
	register_shrinker(&icache_shrinker);

	/* Hash may have been set up in inode_init_early */
	if (!hashdist)
		return;
	/*分配数组对应空间*/
	inode_hashtable =
		alloc_large_system_hash("Inode-cache",
					sizeof(struct hlist_head),
					ihash_entries,
					14,
					0,
					&i_hash_shift,
					&i_hash_mask,
					0);
	/*初始化链表组*/
	for (loop = 0; loop < (1 << i_hash_shift); loop++)
		INIT_HLIST_HEAD(&inode_hashtable[loop]);
}

2.3 files初始化

void __init files_init(unsigned long mempages)
{ 
	int n; 
	/*申请文件cache*/
	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

	/*
	 * One file with associated inode and dcache is very roughly 1K.
	 * Per default don't use more than 10% of our memory for files. 
	 */ 

	n = (mempages * (PAGE_SIZE / 1024)) / 10;
	files_stat.max_files = n; /*更新文件统计信息*/
	if (files_stat.max_files < NR_FILE)
		files_stat.max_files = NR_FILE;
	files_defer_init();/*释放文件描述符表*/
	percpu_counter_init(&nr_files, 0);
} 

2.4 mount初始化

void __init mnt_init(void)
{
	unsigned u;
	int err;

	init_rwsem(&namespace_sem);
	/*mnt cache初始化*/
	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
	/*mount hashtable内存申请*/
	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);

	if (!mount_hashtable)
		panic("Failed to allocate mount hash table\n");

	printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);

	for (u = 0; u < HASH_SIZE; u++)
		INIT_LIST_HEAD(&mount_hashtable[u]);/*初始化hashtable链表*/

	err = sysfs_init();/*sysfs文件系统初始化*/
	if (err)
		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
			__func__, err);
	fs_kobj = kobject_create_and_add("fs", NULL);
	if (!fs_kobj)
		printk(KERN_WARNING "%s: kobj create error\n", __func__);
	init_rootfs();/*初始化ramfs和rootfs*/
	init_mount_tree();/*初始化mount tree*/
}
static void __init init_mount_tree(void)
{
	struct vfsmount *mnt;
	struct mnt_namespace *ns;
	struct path root;

	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
	if (IS_ERR(mnt))
		panic("Can't create rootfs");
	ns = create_mnt_ns(mnt);/*为mnt创建命名空间*/
	if (IS_ERR(ns))
		panic("Can't allocate initial namespace");
	/*初始化进程的相关命名空间*/
	init_task.nsproxy->mnt_ns = ns;
	get_mnt_ns(ns);/*命名空间的进程数加一*/
	/*更新root的相关字段*/
	root.mnt = ns->root;
	root.dentry = ns->root->mnt_root;
	/*设置fs的当前路径和当前root*/
	set_fs_pwd(current->fs, &root);
	set_fs_root(current->fs, &root);
}

2.4.1 创建命名空间

/**
 * create_mnt_ns - creates a private namespace and adds a root filesystem
 * @mnt: pointer to the new root filesystem mountpoint
 */
struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
{
	struct mnt_namespace *new_ns;

	new_ns = alloc_mnt_ns();/*分配命名空间*/
	if (!IS_ERR(new_ns)) {
		/*下面为和mnt建立关系*/
		mnt->mnt_ns = new_ns;
		new_ns->root = mnt;
		list_add(&new_ns->list, &new_ns->root->mnt_list);
	}
	return new_ns;
}
static struct mnt_namespace *alloc_mnt_ns(void)
{
	struct mnt_namespace *new_ns;
	/*从cache中分配命名空间*/	
	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
	if (!new_ns)
		return ERR_PTR(-ENOMEM);
	/*下面为相关字段的初始化*/
	atomic_set(&new_ns->count, 1);
	new_ns->root = NULL;
	INIT_LIST_HEAD(&new_ns->list);
	init_waitqueue_head(&new_ns->poll);
	new_ns->event = 0;
	return new_ns;
}	

2.4.2 创建mount

struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
	struct file_system_type *type = get_fs_type(fstype);
	struct vfsmount *mnt;
	if (!type)
		return ERR_PTR(-ENODEV);
	mnt = vfs_kern_mount(type, flags, name, data);
	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
	    !mnt->mnt_sb->s_subtype)
		mnt = fs_set_subtype(mnt, fstype);
	put_filesystem(type);
	return mnt;
}
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct vfsmount *mnt;
	char *secdata = NULL;
	int error;

	if (!type)
		return ERR_PTR(-ENODEV);

	error = -ENOMEM;
    /*从slab中分配一个mnt*/
	mnt = alloc_vfsmnt(name);
	if (!mnt)
		goto out;

	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
		secdata = alloc_secdata();
		if (!secdata)
			goto out_mnt;

		error = security_sb_copy_data(data, secdata);
		if (error)
			goto out_free_secdata;
	}
	/*调用文件系统控制结构体的get_sb(),分配并初始化一个
	新的超级块并初始化mnt->mnt_sb字段*/
	error = type->get_sb(type, flags, name, data, mnt);
	if (error < 0)
		goto out_free_secdata;
	BUG_ON(!mnt->mnt_sb);

 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
 	if (error)
 		goto out_sb;

	/*
	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
	 * but s_maxbytes was an unsigned long long for many releases. Throw
	 * this warning for a little while to try and catch filesystems that
	 * violate this rule. This warning should be either removed or
	 * converted to a BUG() in 2.6.34.
	 */
	WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
		"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
	/*初始化mnt相关字段*/
	mnt->mnt_mountpoint = mnt->mnt_root;
	mnt->mnt_parent = mnt;
	up_write(&mnt->mnt_sb->s_umount);
	free_secdata(secdata);
	return mnt;/*返回mnt*/
out_sb:
	dput(mnt->mnt_root);
	deactivate_locked_super(mnt->mnt_sb);
out_free_secdata:
	free_secdata(secdata);
out_mnt:
	free_vfsmnt(mnt);
out:
	return ERR_PTR(error);
}

struct vfsmount *alloc_vfsmnt(const char *name)
{
	/*从slab中获得mnt*/
	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
	/*下面进行对mnt的初始化*/
	if (mnt) {
		int err;

		err = mnt_alloc_id(mnt);
		if (err)
			goto out_free_cache;

		if (name) {
			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
			if (!mnt->mnt_devname)
				goto out_free_id;
		}

		atomic_set(&mnt->mnt_count, 1);
		INIT_LIST_HEAD(&mnt->mnt_hash);
		INIT_LIST_HEAD(&mnt->mnt_child);
		INIT_LIST_HEAD(&mnt->mnt_mounts);
		INIT_LIST_HEAD(&mnt->mnt_list);
		INIT_LIST_HEAD(&mnt->mnt_expire);
		INIT_LIST_HEAD(&mnt->mnt_share);
		INIT_LIST_HEAD(&mnt->mnt_slave_list);
		INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_SMP
		mnt->mnt_writers = alloc_percpu(int);
		if (!mnt->mnt_writers)
			goto out_free_devname;
#else
		mnt->mnt_writers = 0;
#endif
	}
	return mnt;

#ifdef CONFIG_SMP
out_free_devname:
	kfree(mnt->mnt_devname);
#endif
out_free_id:
	mnt_free_id(mnt);
out_free_cache:
	kmem_cache_free(mnt_cache, mnt);
	return NULL;
}

2.5 块设备驱动模型初始化

void __init bdev_cache_init(void)
{
	int err;
	struct vfsmount *bd_mnt;
	/*block cache初始化*/
	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
				SLAB_MEM_SPREAD|SLAB_PANIC),
			init_once);
	/*注册block dev*/
	err = register_filesystem(&bd_type);
	if (err)
		panic("Cannot register bdev pseudo-fs");
	bd_mnt = kern_mount(&bd_type);
	if (IS_ERR(bd_mnt))
		panic("Cannot create bdev pseudo-fs");
	/*
	 * This vfsmount structure is only used to obtain the
	 * blockdev_superblock, so tell kmemleak not to report it.
	 */
	kmemleak_not_leak(bd_mnt);
	blockdev_superblock = bd_mnt->mnt_sb;	/* For writeback */
}

2.6 字符设备驱动模型初始化

void __init chrdev_init(void)
{
	cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
	/*字符设备驱动初始化*/
	bdi_init(&directly_mappable_cdev_bdi);
}

这里对linux虚拟文件系统的初始化工作做了整体的梳理,后面将对涉及到的细节做补充,包括inodedentry cache shrinker的注册、sysfs的初始化等。


   

你可能感兴趣的:(linux,list,cache,struct,Allocation,filesystems)