asmlinkage void __init start_kernel(void) { …… vfs_caches_init_early(); …… vfs_caches_init(totalram_pages); …… }
一、早期初始化
虚拟文件系统的早期初始化有函数vfs_caches_init_early()实现,主要负责dentry和inode的hashtable的初始化工作。
/*在start_kernel中调用,用于文件系统中早期的初始化*/ void __init vfs_caches_init_early(void) { /*初始化两个hashtable*/ dcache_init_early(); inode_init_early(); }
1.1 dcache
static void __init dcache_init_early(void) { int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; /*dentry hashtable的空间分配*/ dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_head), dhash_entries, 13, HASH_EARLY, &d_hash_shift, &d_hash_mask, 0); /*hashtable的各个链表初始化*/ for (loop = 0; loop < (1 << d_hash_shift); loop++) INIT_HLIST_HEAD(&dentry_hashtable[loop]); }
1.2 inode
/* * Initialize the waitqueues and inode hash table. */ void __init inode_init_early(void) { int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; /*从cache中分配inode hashtable的内存空间*/ inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, HASH_EARLY, &i_hash_shift, &i_hash_mask, 0); /*初始化hashtable 的各个链表*/ for (loop = 0; loop < (1 << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); }
二、后期初始化
这阶段对inode、dentry、mount、字符设备驱动模型以及块设备驱动模型做了相应的初始化。
/*vfs初始化,在start_kernel中调用*/ void __init vfs_caches_init(unsigned long mempages) { unsigned long reserve; /* Base hash sizes on available memory, with a reserve equal to 150% of current kernel size */ reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); mempages -= reserve; /*为路径名申请的cache*/ names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); /*dentry及其相关内容初始化*/ dcache_init(); inode_init();/*inode初始化*/ files_init(mempages);/*文件相关信息初始化,包括文件描述符表初始化*/ mnt_init();/*mount 的初始化*/ bdev_cache_init(); /*字符设备驱动模型的初始化*/ chrdev_init(); }
2.1 dentry初始化
static void __init dcache_init(void) { int loop; /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature * of the dcache. *//*从cache中申请目录cache*/ dentry_cache = KMEM_CACHE(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /*注册一个shrinker*/ register_shrinker(&dcache_shrinker); /* Hash may have been set up in dcache_init_early */ if (!hashdist) return; /*下面的操作在前面的初始化中已经做了*/ dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_head), dhash_entries, 13, 0, &d_hash_shift, &d_hash_mask, 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) INIT_HLIST_HEAD(&dentry_hashtable[loop]); }
2.2 inode初始化
void __init inode_init(void) { int loop; /* inode slab cache */ /*slab中分配inode缓存*/ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD), init_once); /*注册icache shrinker,将参数对应的shrinker加入指定队列*/ register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ if (!hashdist) return; /*分配数组对应空间*/ inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), ihash_entries, 14, 0, &i_hash_shift, &i_hash_mask, 0); /*初始化链表组*/ for (loop = 0; loop < (1 << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); }
2.3 files初始化
void __init files_init(unsigned long mempages) { int n; /*申请文件cache*/ filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* * One file with associated inode and dcache is very roughly 1K. * Per default don't use more than 10% of our memory for files. */ n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; /*更新文件统计信息*/ if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init();/*释放文件描述符表*/ percpu_counter_init(&nr_files, 0); }
2.4 mount初始化
void __init mnt_init(void) { unsigned u; int err; init_rwsem(&namespace_sem); /*mnt cache初始化*/ mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /*mount hashtable内存申请*/ mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); if (!mount_hashtable) panic("Failed to allocate mount hash table\n"); printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]);/*初始化hashtable链表*/ err = sysfs_init();/*sysfs文件系统初始化*/ if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", __func__, err); fs_kobj = kobject_create_and_add("fs", NULL); if (!fs_kobj) printk(KERN_WARNING "%s: kobj create error\n", __func__); init_rootfs();/*初始化ramfs和rootfs*/ init_mount_tree();/*初始化mount tree*/ }
static void __init init_mount_tree(void) { struct vfsmount *mnt; struct mnt_namespace *ns; struct path root; mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); ns = create_mnt_ns(mnt);/*为mnt创建命名空间*/ if (IS_ERR(ns)) panic("Can't allocate initial namespace"); /*初始化进程的相关命名空间*/ init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns);/*命名空间的进程数加一*/ /*更新root的相关字段*/ root.mnt = ns->root; root.dentry = ns->root->mnt_root; /*设置fs的当前路径和当前root*/ set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); }
2.4.1 创建命名空间
/** * create_mnt_ns - creates a private namespace and adds a root filesystem * @mnt: pointer to the new root filesystem mountpoint */ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) { struct mnt_namespace *new_ns; new_ns = alloc_mnt_ns();/*分配命名空间*/ if (!IS_ERR(new_ns)) { /*下面为和mnt建立关系*/ mnt->mnt_ns = new_ns; new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } return new_ns; }
static struct mnt_namespace *alloc_mnt_ns(void) { struct mnt_namespace *new_ns; /*从cache中分配命名空间*/ new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); /*下面为相关字段的初始化*/ atomic_set(&new_ns->count, 1); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; return new_ns; }
2.4.2 创建mount
struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; if (!type) return ERR_PTR(-ENODEV); mnt = vfs_kern_mount(type, flags, name, data); if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); return mnt; }
struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; char *secdata = NULL; int error; if (!type) return ERR_PTR(-ENODEV); error = -ENOMEM; /*从slab中分配一个mnt*/ mnt = alloc_vfsmnt(name); if (!mnt) goto out; if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { secdata = alloc_secdata(); if (!secdata) goto out_mnt; error = security_sb_copy_data(data, secdata); if (error) goto out_free_secdata; } /*调用文件系统控制结构体的get_sb(),分配并初始化一个 新的超级块并初始化mnt->mnt_sb字段*/ error = type->get_sb(type, flags, name, data, mnt); if (error < 0) goto out_free_secdata; BUG_ON(!mnt->mnt_sb); error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); if (error) goto out_sb; /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE * but s_maxbytes was an unsigned long long for many releases. Throw * this warning for a little while to try and catch filesystems that * violate this rule. This warning should be either removed or * converted to a BUG() in 2.6.34. */ WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); /*初始化mnt相关字段*/ mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; up_write(&mnt->mnt_sb->s_umount); free_secdata(secdata); return mnt;/*返回mnt*/ out_sb: dput(mnt->mnt_root); deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: free_vfsmnt(mnt); out: return ERR_PTR(error); }struct vfsmount *alloc_vfsmnt(const char *name) { /*从slab中获得mnt*/ struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); /*下面进行对mnt的初始化*/ if (mnt) { int err; err = mnt_alloc_id(mnt); if (err) goto out_free_cache; if (name) { mnt->mnt_devname = kstrdup(name, GFP_KERNEL); if (!mnt->mnt_devname) goto out_free_id; } atomic_set(&mnt->mnt_count, 1); INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_expire); INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_SMP mnt->mnt_writers = alloc_percpu(int); if (!mnt->mnt_writers) goto out_free_devname; #else mnt->mnt_writers = 0; #endif } return mnt; #ifdef CONFIG_SMP out_free_devname: kfree(mnt->mnt_devname); #endif out_free_id: mnt_free_id(mnt); out_free_cache: kmem_cache_free(mnt_cache, mnt); return NULL; }2.5 块设备驱动模型初始化
void __init bdev_cache_init(void) { int err; struct vfsmount *bd_mnt; /*block cache初始化*/ bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), init_once); /*注册block dev*/ err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); /* * This vfsmount structure is only used to obtain the * blockdev_superblock, so tell kmemleak not to report it. */ kmemleak_not_leak(bd_mnt); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ }2.6 字符设备驱动模型初始化
void __init chrdev_init(void) { cdev_map = kobj_map_init(base_probe, &chrdevs_lock); /*字符设备驱动初始化*/ bdi_init(&directly_mappable_cdev_bdi); }这里对linux虚拟文件系统的初始化工作做了整体的梳理,后面将对涉及到的细节做补充,包括inode和dentry cache shrinker的注册、sysfs的初始化等。