快乐虾
http://blog.csdn.net/lights_joy/
本文适用于
bfin-uclinux-2009r1.1
代码移植到vs2008
欢迎转载,但请保留作者信息
在init_rootfs注册rootfs_fs_type之后,内核调用init_mount_tree函数创建系统中的根目录,此函数位于fs/namespace.c:
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = (mnt_namespace *)kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
panic("Can't allocate initial namespace");
atomic_set(&ns->count, 1);
INIT_LIST_HEAD(&ns->list);
init_waitqueue_head(&ns->poll);
ns->event = 0;
list_add(&mnt->mnt_list, &ns->list);
ns->root = mnt;
mnt->mnt_ns = ns;
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
root.mnt = ns->root;
root.dentry = ns->root->mnt_root;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
}
首先,init_mount_tree() 函数会调用 do_kern_mount("rootfs", 0, "rootfs", NULL) 来挂载前面已经注册了的 rootfs 文件系统。这个函数内部自然会创建我们最关心也是最关键的根目录。
这个函数位于fs/super.c:
struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
struct file_system_type *type = get_fs_type(fstype);
struct vfsmount *mnt;
if (!type)
return (vfsmount *)ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
put_filesystem(type);
return mnt;
}
它的调用栈为:
> bfin-uclinux-kernel-2009r1.1.dll!do_kern_mount(const char * fstype=0x100b04f0, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行968 C++
bfin-uclinux-kernel-2009r1.1.dll!init_mount_tree() 行2288 + 0x13 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!mnt_init() 行2341 C++
bfin-uclinux-kernel-2009r1.1.dll!vfs_caches_init(unsigned long mempages=0x000037b6) 行2353 C++
bfin-uclinux-kernel-2009r1.1.dll!start_kernel(int size=0x00000040) 行687 + 0xb 字节 C++
此函数首先根据名称查找表示文件系统的结构体,对于rootfs,显然type指向
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.get_sb = rootfs_get_sb,
.kill_sb = kill_litter_super,
};
接着将此参数传递给vfs_kern_mount,看看这个函数做的工作:
这个函数同样位于fs/super.c:
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;
if (!type)
return (vfsmount *)ERR_PTR(-ENODEV);
error = -ENOMEM;
mnt = alloc_vfsmnt(name);
if (!mnt)
goto out;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
secdata = alloc_secdata();
if (!secdata)
goto out_mnt;
error = security_sb_copy_data((char *)data, secdata);
if (error)
goto out_free_secdata;
}
error = type->get_sb(type, flags, name, data, mnt);
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
error = security_sb_kern_mount(mnt->mnt_sb, secdata);
if (error)
goto out_sb;
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
up_write(&mnt->mnt_sb->s_umount);
deactivate_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return (vfsmount *)ERR_PTR(error);
}
在这个场景里,do_kern_mount() 做的工作主要是:
在此函数的开头:
mnt = alloc_vfsmnt(name);
这个函数调用将创建并初始化一个struct vfsmount,初始化完成后各成员的值为:
mnt_hash |
{next=0x00ed7820 prev=0x00ed7820 } |
list_head |
mnt_parent |
0x00000000 {mnt_hash={...} mnt_parent=??? mnt_mountpoint=??? ...} |
vfsmount * |
mnt_mountpoint |
0x00000000 {d_count={...} d_flags=??? d_lock={...} ...} |
dentry * |
mnt_root |
0x00000000 {d_count={...} d_flags=??? d_lock={...} ...} |
dentry * |
mnt_sb |
0x00000000 {s_list={...} s_dev=??? s_blocksize=??? ...} |
super_block * |
mnt_mounts |
{next=0x00ed7838 prev=0x00ed7838 } |
list_head |
mnt_child |
{next=0x00ed7840 prev=0x00ed7840 } |
list_head |
mnt_flags |
0x00000000 |
int |
mnt_devname |
0x00ecf460 "rootfs" |
const char * |
mnt_list |
{next=0x00ed7850 prev=0x00ed7850 } |
list_head |
mnt_expire |
{next=0x00ed7858 prev=0x00ed7858 } |
list_head |
mnt_share |
{next=0x00ed7860 prev=0x00ed7860 } |
list_head |
mnt_slave_list |
{next=0x00ed7868 prev=0x00ed7868 } |
list_head |
mnt_slave |
{next=0x00ed7870 prev=0x00ed7870 } |
list_head |
mnt_master |
0x00000000 {mnt_hash={...} mnt_parent=??? mnt_mountpoint=??? ...} |
vfsmount * |
mnt_ns |
0x00000000 {count={...} root=??? list={...} ...} |
mnt_namespace * |
mnt_id |
0x00000000 |
int |
mnt_group_id |
0x00000000 |
int |
mnt_count |
{counter=0x00000001 } |
atomic_t |
mnt_expiry_mark |
0x00000000 |
int |
mnt_pinned |
0x00000000 |
int |
mnt_ghosts |
0x00000000 |
int |
__mnt_writers |
{counter=0x00000000 } |
atomic_t |
可以看到,它们大部分的值为0。
在创建vfsmount结构体之后,vfs_kern_mount开始创建超级块:
error = type->get_sb(type, flags, name, data, mnt);
这个函数指针将指向rootfs_get_sb函数(fs/ramfs/inode.c):
static int rootfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
mnt);
}
接着跟踪get_sb_nodev(fs/super.c):
int get_sb_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int),
struct vfsmount *mnt)
{
int error;
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
if (IS_ERR(s))
return PTR_ERR(s);
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
up_write(&s->s_umount);
deactivate_super(s);
return error;
}
s->s_flags |= MS_ACTIVE;
return simple_set_mnt(mnt, s);
}
这个函数完成了几个工作,我们一一看过。
get_sb_nodev函数调用的第一个函数就是sget:
/**
* sget - find or create a superblock
* @type: filesystem type superblock should belong to
* @test: comparison callback
* @set: setup callback
* @data: argument to each of them
*/
struct super_block *sget(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
void *data)
{
struct super_block *s = NULL;
struct super_block *old;
int err;
retry:
spin_lock(&sb_lock);
if (test) {
list_for_each_entry(super_block, old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
if (!grab_super(old))
goto retry;
if (s)
destroy_super(s);
return old;
}
}
if (!s) {
spin_unlock(&sb_lock);
s = alloc_super(type);
if (!s)
return (super_block *)ERR_PTR(-ENOMEM);
goto retry;
}
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
destroy_super(s);
return (super_block *)ERR_PTR(err);
}
s->s_type = type;
strlcpy(s->s_id, type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
return s;
}
此函数的调用栈如下:
> bfin-uclinux-kernel-2009r1.1.dll!sget(file_system_type * type=0x100b6828, int (super_block *, void *)* test=0x00000000, int (super_block *, void *)* set=0x1003a955, void * data=0x00000000) 行339 C++
bfin-uclinux-kernel-2009r1.1.dll!get_sb_nodev(file_system_type * fs_type=0x100b6828, int flags=0x80000000, void * data=0x00000000, int (super_block *, void *, int)* fill_super=0x100a6dd0, vfsmount * mnt=0x00ed7820) 行838 + 0x12 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!rootfs_get_sb(file_system_type * fs_type=0x100b6828, int flags=0x00000000, const char * dev_name=0x100b04f0, void * data=0x00000000, vfsmount * mnt=0x00ed7820) 行211 + 0x20 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!vfs_kern_mount(file_system_type * type=0x100b6828, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行914 + 0x1e 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!do_kern_mount(const char * fstype=0x100b04f0, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行972 + 0x15 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!init_mount_tree() 行2288 + 0x13 字节 C++
bfin-uclinux-kernel-2009r1.1.dll!mnt_init() 行2341 C++
bfin-uclinux-kernel-2009r1.1.dll!vfs_caches_init(unsigned long mempages=0x000037b6) 行2353 C++
bfin-uclinux-kernel-2009r1.1.dll!start_kernel(int size=0x00000040) 行687 + 0xb 字节 C++
传递进来的test回调函数为NULL,data也为NULL,type指向rootfs_fs_type这一全局变量,set回调函数则指向set_anon_super(fs/super.c)。
因而此函数将首先调用alloc_super分配一个struct super_block,其值为:
此函数位于fs/super.c,用以分配一个super_block并进行初始化,初始化完成后这个结构体有如下值:
s_list |
{next=0x00000000 prev=0x00000000 } |
list_head |
s_dev |
0x00000000 |
unsigned int |
s_blocksize |
0x00000000 |
unsigned long |
s_blocksize_bits |
0x00 |
unsigned char |
s_dirt |
0x00 |
unsigned char |
s_maxbytes |
0x000000007fffffff |
unsigned __int64 |
s_type |
0x00000000 {name=??? fs_flags=??? get_sb=??? ...} |
file_system_type * |
s_op |
0x100be278 default_op {alloc_inode=0x00000000 destroy_inode=0x00000000 dirty_inode=0x00000000 ...} |
const super_operations * |
dq_op |
0x00000000 {initialize=??? drop=??? alloc_space=??? ...} |
dquot_operations * |
s_qcop |
0x00000000 {quota_on=??? quota_off=??? quota_sync=??? ...} |
quotactl_ops * |
s_export_op |
0x00000000 {encode_fh=??? fh_to_dentry=??? fh_to_parent=??? ...} |
const export_operations * |
s_flags |
0x00000000 |
unsigned long |
s_magic |
0x00000000 |
unsigned long |
s_root |
0x00000000 {d_count={...} d_flags=??? d_lock={...} ...} |
dentry * |
s_umount |
{activity=0x00000000 wait_lock={...} wait_list={...} } |
rw_semaphore |
s_lock |
{count={...} wait_lock={...} wait_list={...} } |
mutex |
s_count |
0x40000000 |
int |
s_need_sync_fs |
0x00000000 |
int |
s_active |
{counter=0x00000001 } |
atomic_t |
s_xattr |
0x00000000 |
xattr_handler * * |
s_inodes |
{next=0x00ed5670 prev=0x00ed5670 } |
list_head |
s_dirty |
{next=0x00ed5678 prev=0x00ed5678 } |
list_head |
s_io |
{next=0x00ed5680 prev=0x00ed5680 } |
list_head |
s_more_io |
{next=0x00ed5688 prev=0x00ed5688 } |
list_head |
s_anon |
{first=0x00000000 } |
hlist_head |
s_files |
{next=0x00ed5694 prev=0x00ed5694 } |
list_head |
s_dentry_lru |
{next=0x00ed569c prev=0x00ed569c } |
list_head |
s_nr_dentry_unused |
0x00000000 |
int |
s_bdev |
0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...} |
block_device * |
s_mtd |
0x00000000 |
mtd_info * |
s_instances |
{next=0x00ed56b0 prev=0x00ed56b0 } |
list_head |
s_dquot |
{flags=0x00000000 dqio_mutex={...} dqonoff_mutex={...} ...} |
quota_info |
s_frozen |
0x00000000 |
int |
s_wait_unfrozen |
{lock={...} task_list={...} } |
__wait_queue_head |
s_id |
0x00ed5790 "" |
char [32] |
s_fs_info |
0x00000000 |
void * |
s_mode |
0x00000000 |
unsigned int |
s_vfs_rename_mutex |
{count={...} wait_lock={...} wait_list={...} } |
mutex |
s_time_gran |
0x3b9aca00 |
unsigned int |
s_subtype |
0x00000000 <错误的指针> |
char * |
s_options |
0x00000000 <错误的指针> |
char * |
此时还看不出file_system_type和super_block之间的关系。
在使用alloc_super完成super_block结构体的分配后,sget接着调用set回调函数:
err = set(s, data);
对于rootfs,此回调函数指向set_anon_super(fs/super.c)
/*
* Unnamed block devices are dummy devices used by virtual
* filesystems which don't use real block-devices. -- jrs
*/
static DEFINE_IDA(unnamed_dev_ida);
static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
int set_anon_super(struct super_block *s, void *data)
{
int dev;
int error;
retry:
if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
return -ENOMEM;
spin_lock(&unnamed_dev_lock);
error = ida_get_new(&unnamed_dev_ida, &dev);
spin_unlock(&unnamed_dev_lock);
if (error == -EAGAIN)
/* We raced and lost with another CPU. */
goto retry;
else if (error)
return -EAGAIN;
if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
spin_lock(&unnamed_dev_lock);
ida_remove(&unnamed_dev_ida, dev);
spin_unlock(&unnamed_dev_lock);
return -EMFILE;
}
s->s_dev = MKDEV(0, dev & MINORMASK);
return 0;
}
这个函数最重要的工作是设置s_dev的值,对于rootfs,这个值为0。
在完成super_block的初始化后,sget开始关联super_block和file_system_type:
s->s_type = type;
strlcpy(s->s_id, type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
list_add(&s->s_instances, &type->fs_supers);
可以看到super_block结构体中的s_type指向了包含此超级块的file_system_type,且s_id复制了文件系统的名称。在这里super_blocks是一个全局变量,用以链接系统中所有的超级块。
从这里也可以看到一个super_block只有一个对应的file_system_type,但是一个file_system_type却可以有多个super_block。
sget最后调用了一个叫get_filesystem的函数,但是由于未启用MODULE支持,此函数相当于什么也没做。
/* WARNING: This can be used only if we _already_ own a reference */
void get_filesystem(struct file_system_type *fs)
{
__module_get(fs->owner);
}
在完成sget调用后,rootfs_get_sb接着调用ramfs_fill_super函数():
static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
{
struct inode * inode;
struct dentry * root;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
sb->s_time_gran = 1;
inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0);
if (!inode)
return -ENOMEM;
root = d_alloc_root(inode);
if (!root) {
iput(inode);
return -ENOMEM;
}
sb->s_root = root;
return 0;
}
在alloc_super函数中将super_block的s_op指向了default_op,什么功能也没有,因而在这里首先替换了s_op,使其指向ramfs_ops:
const struct super_operations ramfs_ops = {
NULL,//struct inode *(*alloc_inode)(struct super_block *sb);
NULL,//void (*destroy_inode)(struct inode *);
NULL,//void (*dirty_inode) (struct inode *);
NULL,//int (*write_inode) (struct inode *, int);
/*.drop_inode =*/ generic_delete_inode,
NULL,//void (*delete_inode) (struct inode *);
NULL,//void (*put_super) (struct super_block *);
NULL,//void (*write_super) (struct super_block *);
NULL,//int (*sync_fs)(struct super_block *sb, int wait);
NULL,//void (*write_super_lockfs) (struct super_block *);
NULL,//void (*unlockfs) (struct super_block *);
/*.statfs =*/ simple_statfs,
};
ramfs_fill_super调用的第一个函数是ramfs_get_inode(fs/ramfs/inode.c)
struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
{
struct inode * inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
break;
case S_IFREG:
inode->i_op = &ramfs_file_inode_operations;
inode->i_fop = &ramfs_file_operations;
break;
case S_IFDIR:
inode->i_op = &ramfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
break;
}
}
return inode;
}
此函数执行完后,将得到这样的一个inode:
i_hash |
{next=0x00000000 pprev=0x00000000 } |
hlist_node |
i_list |
{next=0x100b6348 prev=0x100b6348 } |
list_head |
i_sb_list |
{next=0x00ed5670 prev=0x00ed5670 } |
list_head |
i_dentry |
{next=0x04800de8 prev=0x04800de8 } |
list_head |
i_ino |
0x00000001 |
unsigned long |
i_count |
{counter=0x00000001 } |
atomic_t |
i_nlink |
0x00000002 |
unsigned int |
i_uid |
0x00000000 |
unsigned int |
i_gid |
0x00000000 |
unsigned int |
i_rdev |
0x00000000 |
unsigned int |
i_version |
0x0000000000000000 |
unsigned __int64 |
i_size |
0x0000000000000000 |
__int64 |
i_atime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_mtime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_ctime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_blkbits |
0x0000000c |
unsigned int |
i_blocks |
0x00000000 |
unsigned long |
i_bytes |
0x0000 |
unsigned short |
i_mode |
0x41ed |
unsigned short |
i_lock |
{raw_lock={...} } |
spinlock_t |
i_mutex |
{count={...} wait_lock={...} wait_list={...} } |
mutex |
i_alloc_sem |
{activity=0x00000000 wait_lock={...} wait_list={...} } |
rw_semaphore |
i_op |
0x100b09b8 struct inode_operations const ramfs_dir_inode_operations {create=0x100a6a40 lookup=0x1003a400 link=0x100394dd ...} |
const inode_operations * |
i_fop |
0x100b0188 struct file_operations const simple_dir_operations {owner=0x00000000 llseek=0x1003a450 read=0x10039c71 ...} |
const file_operations * |
i_sb |
0x00ed5600 {s_list={...} s_dev=0x00000000 s_blocksize=0x00001000 ...} |
super_block * |
i_flock |
0x00000000 {fl_next=??? fl_link={...} fl_block={...} ...} |
file_lock * |
i_mapping |
0x04800e74 {host=0x04800dd0 page_tree={...} tree_lock={...} ...} |
address_space * |
i_data |
{host=0x04800dd0 page_tree={...} tree_lock={...} ...} |
address_space |
i_devices |
{next=0x04800ec8 prev=0x04800ec8 } |
list_head |
i_pipe |
0x00000000 {wait={...} nrbufs=??? curbuf=??? ...} |
pipe_inode_info * |
i_bdev |
0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...} |
block_device * |
i_cdev |
0x00000000 {kobj={...} owner=??? ops=??? ...} |
cdev * |
i_cindex |
0x00000000 |
int |
i_generation |
0x00000000 |
unsigned int |
i_state |
0x00000000 |
unsigned long |
dirtied_when |
0x00000000 |
unsigned long |
i_flags |
0x00000000 |
unsigned int |
i_writecount |
{counter=0x00000000 } |
atomic_t |
i_private |
0x00000000 |
void * |
ramfs_get_inode函数调用的第一个函数就是new_inode(fs/inode.c)
/**
* new_inode - obtain an inode
* @sb: superblock
*
* Allocates a new inode for given superblock. The default gfp_mask
* for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.
* If HIGHMEM pages are unsuitable or it is known that pages allocated
* for the page cache are not reclaimable or migratable,
* mapping_set_gfp_mask() must be called with suitable flags on the
* newly created inode's mapping
*
*/
struct inode *new_inode(struct super_block *sb)
{
/*
* On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
static unsigned int last_ino;
struct inode * inode;
spin_lock_prefetch(&inode_lock);
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_sb_list, &sb->s_inodes);
inode->i_ino = ++last_ino;
inode->i_state = 0;
spin_unlock(&inode_lock);
}
return inode;
}
1.2.2.2.2.1.1 alloc_inode
new_inode调用的第一个函数为alloc_inode:
static struct inode *alloc_inode(struct super_block *sb)
{
static const struct address_space_operations empty_aops;
static struct inode_operations empty_iops;
static const struct file_operations empty_fops;
struct inode *inode;
if (sb->s_op->alloc_inode)
inode = sb->s_op->alloc_inode(sb);
else
inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
if (inode) {
struct address_space * const mapping = &inode->i_data;
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
#ifdef CONFIG_QUOTA
memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
#endif
inode->i_pipe = NULL;
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
if (security_inode_alloc(inode)) {
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
kmem_cache_free(inode_cachep, (inode));
return NULL;
}
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
mutex_init(&inode->i_mutex);
lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
init_rwsem(&inode->i_alloc_sem);
lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
mapping->writeback_index = 0;
/*
* If the block_device provides a backing_dev_info for client
* inodes then use that. Otherwise the inode share the bdev's
* backing_dev_info.
*/
if (sb->s_bdev) {
struct backing_dev_info *bdi;
bdi = sb->s_bdev->bd_inode_backing_dev_info;
if (!bdi)
bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
mapping->backing_dev_info = bdi;
}
inode->i_private = NULL;
inode->i_mapping = mapping;
}
return inode;
}
这个函数首先分配一个inode结构体,然后关联inode和super_block:
inode->i_sb = sb;
也由此可以看到inode和super_block之间是单向联系的!
还有inode的i_mapping初始化时将指向自身的i_data!
初始化完成后inode将有以下值:
i_hash |
{next=0x00000000 pprev=0x00000000 } |
hlist_node |
i_list |
{next=0x00000000 prev=0x00000000 } |
list_head |
i_sb_list |
{next=0x00000000 prev=0x00000000 } |
list_head |
i_dentry |
{next=0x04800de8 prev=0x04800de8 } |
list_head |
i_ino |
0x00000000 |
unsigned long |
i_count |
{counter=0x00000001 } |
atomic_t |
i_nlink |
0x00000001 |
unsigned int |
i_uid |
0x00000000 |
unsigned int |
i_gid |
0x00000000 |
unsigned int |
i_rdev |
0x00000000 |
unsigned int |
i_version |
0x0000000000000000 |
unsigned __int64 |
i_size |
0x0000000000000000 |
__int64 |
i_atime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_mtime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_ctime |
{tv_sec=0x00000000 tv_nsec=0x00000000 } |
timespec |
i_blkbits |
0x0000000c |
unsigned int |
i_blocks |
0x00000000 |
unsigned long |
i_bytes |
0x0000 |
unsigned short |
i_mode |
0x0000 |
unsigned short |
i_lock |
{raw_lock={...} } |
spinlock_t |
i_mutex |
{count={...} wait_lock={...} wait_list={...} } |
mutex |
i_alloc_sem |
{activity=0x00000000 wait_lock={...} wait_list={...} } |
rw_semaphore |
i_op |
0x100be158 empty_iops {create=0x00000000 lookup=0x00000000 link=0x00000000 ...} |
const inode_operations * |
i_fop |
0x100b0078 empty_fops {owner=0x00000000 llseek=0x00000000 read=0x00000000 ...} |
const file_operations * |
i_sb |
0x00ed5600 {s_list={...} s_dev=0x00000000 s_blocksize=0x00001000 ...} |
super_block * |
i_flock |
0x00000000 {fl_next=??? fl_link={...} fl_block={...} ...} |
file_lock * |
i_mapping |
0x04800e74 {host=0x04800dd0 page_tree={...} tree_lock={...} ...} |
address_space * |
i_data |
{host=0x04800dd0 page_tree={...} tree_lock={...} ...} |
address_space |
i_devices |
{next=0x04800ec8 prev=0x04800ec8 } |
list_head |
i_pipe |
0x00000000 {wait={...} nrbufs=??? curbuf=??? ...} |
pipe_inode_info * |
i_bdev |
0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...} |
block_device * |
i_cdev |
0x00000000 {kobj={...} owner=??? ops=??? ...} |
cdev * |
i_cindex |
0x00000000 |
int |
i_generation |
0x00000000 |
unsigned int |
i_state |
0x00000000 |
unsigned long |
dirtied_when |
0x00000000 |
unsigned long |
i_flags |
0x00000000 |
unsigned int |
i_writecount |
{counter=0x00000000 } |
atomic_t |
i_private |
0x00000000 |
void * |
1.2.2.2.2.1.2 关联inode和super_block
在alloc_inode分配并初始化完成后new_inode将关联inode和super_block:
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_sb_list, &sb->s_inodes);
inode->i_ino = ++last_ino;
从这几行代码可以看出每个super_block都可以有多个inode,而每一个inode都只能对应一个super_block。
且每个inode的i_ino都不重复地按顺序递增。
在使用new_inode配置完成通用的inode属性后,ramfs_get_inode将配置与具体文件系统相关的属性:
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
inode->i_op = &ramfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
在创建完成inode之后,开始在此inode之上创建dentry:
root = d_alloc_root(inode);
if (!root) {
iput(inode);
return -ENOMEM;
}
sb->s_root = root;
此函数位于fs/dcache.c:
/**
* d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for
*
* Allocate a root ("/") dentry for the inode given. The inode is
* instantiated and returned. %NULL is returned if there is insufficient
* memory or the inode passed is %NULL.
*/
struct dentry * d_alloc_root(struct inode * root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
static const struct qstr name = { 0, /*.len =*/ 1, /*.name =*/ (unsigned char*)"/" };
res = d_alloc(NULL, &name);
if (res) {
res->d_sb = root_inode->i_sb;
res->d_parent = res;
d_instantiate(res, root_inode);
}
}
return res;
}
此函数位于fs/dcache.c:
/**
* d_alloc - allocate a dcache entry
* @parent: parent of entry to allocate
* @name: qstr of the name
*
* Allocates a dentry. It returns %NULL if there is insufficient memory
* available. On a success the dentry is returned. The name passed in is
* copied and the copy passed in may be reused after this call.
*/
struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
dentry = (struct dentry *)kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
if (name->len > DNAME_INLINE_LEN-1) {
dname = (char*)kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
} else {
dname = (char*)dentry->d_iname;
}
dentry->d_name.name = (unsigned char*)dname;
dentry->d_name.len = name->len;
dentry->d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
#ifdef CONFIG_PROFILING
dentry->d_cookie = NULL;
#endif
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
if (parent) {
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
return dentry;
}
由此函数可以看出,每个dentry都可以有parent,最顶层的dentry则没有。
每一个dentry都有一个子dentry的链表。
每个dentry都保存了指向super_block的指针。
经过此函数可以得到这样一个根dentry:
d_count |
{counter=0x00000001 } |
atomic_t |
d_flags |
0x00000010 |
unsigned int |
d_lock |
{raw_lock={...} } |
spinlock_t |
d_inode |
0x00000000 {i_hash={...} i_list={...} i_sb_list={...} ...} |
inode * |
d_hash |
{next=0x00000000 pprev=0x00000000 } |
hlist_node |
d_parent |
0x00000000 {d_count={...} d_flags=??? d_lock={...} ...} |
dentry * |
d_name |
{hash=0x00000000 len=0x00000001 name=0x04801874 "/" } |
qstr |
d_lru |
{next=0x04801840 prev=0x04801840 } |
list_head |
d_u |
{d_child={...} d_rcu={...} } |
dentry::<unnamed-type-d_u> |
d_subdirs |
{next=0x04801850 prev=0x04801850 } |
list_head |
d_alias |
{next=0x04801858 prev=0x04801858 } |
list_head |
d_time |
0xcdcdcdcd |
unsigned long |
d_op |
0x00000000 {d_revalidate=??? d_hash=??? d_compare=??? ...} |
dentry_operations * |
d_sb |
0x00000000 {s_list={...} s_dev=??? s_blocksize=??? ...} |
super_block * |
d_fsdata |
0x00000000 |
void * |
d_mounted |
0x00000000 |
int |
d_iname |
0x04801874 "/" |
unsigned char [36] |
在完成dentry的分配后,d_alloc_root开始关联inode和dentry:
res->d_sb = root_inode->i_sb;
res->d_parent = res;
可见,对于最上层的dentry,其父指针指向自己。
最后,d_alloc_root调用d_instantiate关联inode和dentry:
/**
* d_instantiate - fill in inode information for a dentry
* @entry: dentry to complete
* @inode: inode to attach to this dentry
*
* Fill in inode information in the entry.
*
* This turns negative dentries into productive full members
* of society.
*
* NOTE! This assumes that the inode count has been incremented
* (or otherwise set) by the caller to indicate that it is now
* in use by the dcache.
*/
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
__d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
其操作由__d_instantiate函数完成:
/* the caller must hold dcache_lock */
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (inode)
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
fsnotify_d_instantiate(dentry, inode);
}
很简单的关联。
在最后,rootfs_get_sb调用了simple_set_mnt:
int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
{
mnt->mnt_sb = sb;
mnt->mnt_root = dget(sb->s_root);
return 0;
}
这个函数将vfsmount和super_block关联起来,同时将vfsmount和dentry根节点关联起来。
在创建super_block完成后,vfs_kern_mount将关联dentry:
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
注意这里mnt_parent的设置。
创建根目录的最后一步就是创建mnt_namespace:
ns = (mnt_namespace *)kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
panic("Can't allocate initial namespace");
atomic_set(&ns->count, 1);
INIT_LIST_HEAD(&ns->list);
init_waitqueue_head(&ns->poll);
ns->event = 0;
list_add(&mnt->mnt_list, &ns->list);
ns->root = mnt;
mnt->mnt_ns = ns;
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
root.mnt = ns->root;
root.dentry = ns->root->mnt_root;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
这里的关键一点是将创建出来的ns保存到了init_task中,这样如果要遍历整个目录树,只要从init_task.nsproxi->mnt_ns开始就可以了。
近日,我家6岁的小姑娘参加了第六届POP全国少儿英语风采大赛,拉票进行中(2011-6-15前)。
请帮忙点击新东方网站的链接:
http://popdasai.xdf.cn/toupiao.php?do=space&uid=4237
投她一票,谢谢!