get_inode(dev,numb)
/*===========================================================================* * get_inode * *===========================================================================*/ PUBLIC struct inode *get_inode(dev, numb) dev_t dev; /* device on which inode resides */ int numb; /* inode number (ANSI: may not be unshort) */ { /* Find a slot in the inode table, load the specified inode into it, and * return a pointer to the slot. If 'dev' == NO_DEV, just return a free slot. */ register struct inode *rip, *xp; /* Search the inode table both for (dev, numb) and a free slot. */ xp = NIL_INODE; for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++) { if (rip->i_count > 0) { /* only check used slots for (dev, numb) */ if (rip->i_dev == dev && rip->i_num == numb) { /* This is the inode that we are looking for. */ rip->i_count++; return(rip); /* (dev, numb) found */ } } else { xp = rip; /* remember this free slot for later */ } } /* Inode we want is not currently in use. Did we find a free slot? */ if (xp == NIL_INODE) { /* inode table completely full */ err_code = ENFILE; return(NIL_INODE); } /* A free inode slot has been located. Load the inode into it. */ xp->i_dev = dev; xp->i_num = numb; xp->i_count = 1; if (dev != NO_DEV) rw_inode(xp, READING); /* get inode from disk */ xp->i_update = 0; /* all the times are initially up-to-date */ return(xp); }
-----------------------------------------------------------------------------------------------------------------------------
/*===========================================================================* * rw_inode * *===========================================================================*/ PUBLIC void rw_inode(rip, rw_flag) register struct inode *rip; /* pointer to inode to be read/written */ int rw_flag; /* READING or WRITING */ { /* An entry in the inode table is to be copied to or from the disk. */ register struct buf *bp; register struct super_block *sp; d1_inode *dip; d2_inode *dip2; block_t b, offset; /* Get the block where the inode resides. */ sp = get_super(rip->i_dev); /* get pointer to super block */ rip->i_sp = sp; /* inode must contain super block pointer */ offset = sp->s_imap_blocks + sp->s_zmap_blocks + 2; b = (block_t) (rip->i_num - 1)/sp->s_inodes_per_block + offset; bp = get_block(rip->i_dev, b, NORMAL); dip = bp->b_v1_ino + (rip->i_num - 1) % V1_INODES_PER_BLOCK; dip2 = bp->b_v2_ino + (rip->i_num - 1) % V2_INODES_PER_BLOCK; /* Do the read or write. */ if (rw_flag == WRITING) { if (rip->i_update) update_times(rip); /* times need updating */ if (sp->s_rd_only == FALSE) bp->b_dirt = DIRTY; } /* Copy the inode from the disk block to the in-core table or vice versa. * If the fourth parameter below is FALSE, the bytes are swapped. */ if (sp->s_version == V1) old_icopy(rip, dip, rw_flag, sp->s_native); else new_icopy(rip, dip2, rw_flag, sp->s_native); put_block(bp, INODE_BLOCK); rip->i_dirt = CLEAN; } /*===========================================================================* * old_icopy * *===========================================================================*/ PRIVATE void old_icopy(rip, dip, direction, norm) register struct inode *rip; /* pointer to the in-core inode struct */ register d1_inode *dip; /* pointer to the d1_inode inode struct */ int direction; /* READING (from disk) or WRITING (to disk) */ int norm; /* TRUE = do not swap bytes; FALSE = swap */ { /* The V1.x IBM disk, the V1.x 68000 disk, and the V2 disk (same for IBM and * 68000) all have different inode layouts. When an inode is read or written * this routine handles the conversions so that the information in the inode * table is independent of the disk structure from which the inode came. * The old_icopy routine copies to and from V1 disks. */ int i; if (direction == READING) { /* Copy V1.x inode to the in-core table, swapping bytes if need be. */ rip->i_mode = conv2(norm, (int) dip->d1_mode); rip->i_uid = conv2(norm, (int) dip->d1_uid ); rip->i_size = conv4(norm, dip->d1_size); rip->i_mtime = conv4(norm, dip->d1_mtime); rip->i_atime = rip->i_mtime; rip->i_ctime = rip->i_mtime; rip->i_nlinks = (nlink_t) dip->d1_nlinks; /* 1 char */ rip->i_gid = (gid_t) dip->d1_gid; /* 1 char */ rip->i_ndzones = V1_NR_DZONES; rip->i_nindirs = V1_INDIRECTS; for (i = 0; i < V1_NR_TZONES; i++) rip->i_zone[i] = conv2(norm, (int) dip->d1_zone[i]); } else { /* Copying V1.x inode to disk from the in-core table. */ dip->d1_mode = conv2(norm, (int) rip->i_mode); dip->d1_uid = conv2(norm, (int) rip->i_uid ); dip->d1_size = conv4(norm, rip->i_size); dip->d1_mtime = conv4(norm, rip->i_mtime); dip->d1_nlinks = (nlink_t) rip->i_nlinks; /* 1 char */ dip->d1_gid = (gid_t) rip->i_gid; /* 1 char */ for (i = 0; i < V1_NR_TZONES; i++) dip->d1_zone[i] = conv2(norm, (int) rip->i_zone[i]); } } /*===========================================================================* * new_icopy * *===========================================================================*/ PRIVATE void new_icopy(rip, dip, direction, norm) register struct inode *rip; /* pointer to the in-core inode struct */ register d2_inode *dip; /* pointer to the d2_inode struct */ int direction; /* READING (from disk) or WRITING (to disk) */ int norm; /* TRUE = do not swap bytes; FALSE = swap */ { /* Same as old_icopy, but to/from V2 disk layout. */ int i; if (direction == READING) { /* Copy V2.x inode to the in-core table, swapping bytes if need be. */ rip->i_mode = conv2(norm,dip->d2_mode); rip->i_uid = conv2(norm,dip->d2_uid ); rip->i_nlinks = conv2(norm,(int) dip->d2_nlinks); rip->i_gid = conv2(norm,(int) dip->d2_gid ); rip->i_size = conv4(norm,dip->d2_size); rip->i_atime = conv4(norm,dip->d2_atime); rip->i_ctime = conv4(norm,dip->d2_ctime); rip->i_mtime = conv4(norm,dip->d2_mtime); rip->i_ndzones = V2_NR_DZONES; rip->i_nindirs = V2_INDIRECTS; for (i = 0; i < V2_NR_TZONES; i++) rip->i_zone[i] = conv4(norm, (long) dip->d2_zone[i]); } else { /* Copying V2.x inode to disk from the in-core table. */ dip->d2_mode = conv2(norm,rip->i_mode); dip->d2_uid = conv2(norm,rip->i_uid ); dip->d2_nlinks = conv2(norm,rip->i_nlinks); dip->d2_gid = conv2(norm,rip->i_gid ); dip->d2_size = conv4(norm,rip->i_size); dip->d2_atime = conv4(norm,rip->i_atime); dip->d2_ctime = conv4(norm,rip->i_ctime); dip->d2_mtime = conv4(norm,rip->i_mtime); for (i = 0; i < V2_NR_TZONES; i++) dip->d2_zone[i] = conv4(norm, (long) rip->i_zone[i]); } }
get_inode先查询内存中的inode_table,选择i_count大于0的(即正在被使用的)inode,看dev和numb是否匹配。若匹配则找到,inode的count域加一,返回指针。若遍历完都未找到匹配,则numb节点不在内存。使用刚才遍历过程记录的空slot,调用rw_inode读取的inode放入此slot中。刚调入内存的inode在rw_inode函数最后设置为CLEAN。其他访问inode的操作可能修改为DIRTY。
注:dirty标志仅代表inode存储在磁盘上部分是dirty的还是clean的。inode仅在内存中的部分与dirty标志并不相互影响。
疑问:若用给一个参数numb,其代表的inode并未使用alloc_inode分配,结果会怎样?
put_inode(rip)
/*===========================================================================* * put_inode * *===========================================================================*/ PUBLIC void put_inode(rip) register struct inode *rip; /* pointer to inode to be released */ { /* The caller is no longer using this inode. If no one else is using it either * write it back to the disk immediately. If it has no links, truncate it and * return it to the pool of available inodes. */ if (rip == NIL_INODE) return; /* checking here is easier than in caller */ if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */ if ((rip->i_nlinks & BYTE) == 0) { /* i_nlinks == 0 means free the inode. */ truncate(rip); /* return all the disk blocks */ rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */ rip->i_dirt = DIRTY; free_inode(rip->i_dev, rip->i_num); } else { if (rip->i_pipe == I_PIPE) truncate(rip); } rip->i_pipe = NO_PIPE; /* should always be cleared */ if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING); } }
参数为内存inodetable中的项的指针。put函数对inode的icount域减一。若为0,则现在没有程序在使用此内存inode,可以从内存inode中删除。进一步判断ilink是否为0,若是,则说明现在此inode可以free掉,且调用truncate函数将此inode中占用的数据块全部设为空闲。将内存inodetable中此项设为未分配,调用free_inode在位示图中标记此inode可用。
这里要注意对pipe文件的处理,当一个进程释放管道文件的时候应该将其inode删除。因为为一个进程保留管道文件是没有意义的。
最后,将inode的pipe属性修改为NO_PIPE 。如果inode被修改了,调用rw_inode写回。(当icount减一等于0时,不管ilink是否为0,都调用rw_inode写回数据)
单独看这一个过程的话,可能会觉得只有在ilinks为0的时候才设置dirty标志,但是时候设置好像又没什么意义了,反正这个inode是没人使用了。但是当icount为0,ilinks不等于0时,并没有设置dirty标志。接着执行下面的写回脏inode会不会导致刚才对于icount的修改没有写回到磁盘。(分析错误,icount只存在于内存中,根本就不在磁盘中存储。icount代表当前使用这个inode的进程的数目。)
alloc_inode & free_inode调用位示图操作,修改位示图相关位的值。
/*===========================================================================* * alloc_inode * *===========================================================================*/ PUBLIC struct inode *alloc_inode(dev, bits) dev_t dev; /* device on which to allocate the inode */ mode_t bits; /* mode of the inode */ { /* Allocate a free inode on 'dev', and return a pointer to it. */ register struct inode *rip; register struct super_block *sp; int major, minor, inumb; bit_t b; sp = get_super(dev); /* get pointer to super_block */ if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */ err_code = EROFS; return(NIL_INODE); } /* Acquire an inode from the bit map. */ b = alloc_bit(sp, IMAP, sp->s_isearch); if (b == NO_BIT) { err_code = ENFILE; major = (int) (sp->s_dev >> MAJOR) & BYTE; minor = (int) (sp->s_dev >> MINOR) & BYTE; printf("Out of i-nodes on %sdevice %d/%d\n", sp->s_dev == ROOT_DEV ? "root " : "", major, minor); return(NIL_INODE); } sp->s_isearch = b; /* next time start here */ inumb = (int) b; /* be careful not to pass unshort as param */ /* Try to acquire a slot in the inode table. */ if ((rip = get_inode(NO_DEV, inumb)) == NIL_INODE) { /* No inode table slots available. Free the inode just allocated. */ free_bit(sp, IMAP, b); } else { /* An inode slot is available. Put the inode just allocated into it. */ rip->i_mode = bits; /* set up RWX bits */ rip->i_nlinks = (nlink_t) 0; /* initial no links */ rip->i_uid = fp->fp_effuid; /* file's uid is owner's */ rip->i_gid = fp->fp_effgid; /* ditto group id */ rip->i_dev = dev; /* mark which device it is on */ rip->i_ndzones = sp->s_ndzones; /* number of direct zones */ rip->i_nindirs = sp->s_nindirs; /* number of indirect zones per blk*/ rip->i_sp = sp; /* pointer to super block */ /* Fields not cleared already are cleared in wipe_inode(). They have * been put there because truncate() needs to clear the same fields if * the file happens to be open while being truncated. It saves space * not to repeat the code twice. */ wipe_inode(rip); } return(rip); } /*===========================================================================* * wipe_inode * *===========================================================================*/ PUBLIC void wipe_inode(rip) register struct inode *rip; /* the inode to be erased */ { /* Erase some fields in the inode. This function is called from alloc_inode() * when a new inode is to be allocated, and from truncate(), when an existing * inode is to be truncated. */ register int i; rip->i_size = 0; rip->i_update = ATIME | CTIME | MTIME; /* update all times later */ rip->i_dirt = DIRTY; for (i = 0; i < V2_NR_TZONES; i++) rip->i_zone[i] = NO_ZONE; }
alloc_inode分配一个inode,mode为参数中给出的bits。与数据块的分配不同,并不需要指定在那个块号附近分配。这里都是从超级块中的sp->isearch(第一个可用inode号)开始。先调用alloc_bit从inode位示图中分配一个bit。然后还要在inodetable中为此inode找到位置。调用get_inode(NO_DEV, )若没有,则将刚才分配到的bit也free掉。
若get_inode成功从inode_table中获得一个位置rip,则对这个rip指向的inode进行初始化,比如inode_number,mode,uid,gid,dev等。初始的link数目为0 。还有部分初始化放在wipe_inode中进行。因此别处也要用到这部分初始化,因此放在一个函数里,减少代码量。
注意,这里分配的inode仅仅是在位示图中分配了一个位,然后在内存inode_table初始化了这个inode。真正磁盘inode存储区域内并没有这个inode具体信息(当然属于这个inode的存储区域是有的)。所以在分配了inode时在wipe_inode函数中将内存inode_table中对应项设置为dirty。具体写到磁盘上要put_inode依据dirty标志调用rw_inode来完成。
/*===========================================================================* * free_inode * *===========================================================================*/ PUBLIC void free_inode(dev, inumb) dev_t dev; /* on which device is the inode */ ino_t inumb; /* number of inode to be freed */ { /* Return an inode to the pool of unallocated inodes. */ register struct super_block *sp; bit_t b; /* Locate the appropriate super_block. */ sp = get_super(dev); if (inumb <= 0 || inumb > sp->s_ninodes) return; b = inumb; free_bit(sp, IMAP, b); if (b < sp->s_isearch) sp->s_isearch = b; } /*===========================================================================* * update_times * *===========================================================================*/ PUBLIC void update_times(rip) register struct inode *rip; /* pointer to inode to be read/written */ { /* Various system calls are required by the standard to update atime, ctime, * or mtime. Since updating a time requires sending a message to the clock * task--an expensive business--the times are marked for update by setting * bits in i_update. When a stat, fstat, or sync is done, or an inode is * released, update_times() may be called to actually fill in the times. */ time_t cur_time; struct super_block *sp; sp = rip->i_sp; /* get pointer to super block. */ if (sp->s_rd_only) return; /* no updates for read-only file systems */ cur_time = clock_time(); if (rip->i_update & ATIME) rip->i_atime = cur_time; if (rip->i_update & CTIME) rip->i_ctime = cur_time; if (rip->i_update & MTIME) rip->i_mtime = cur_time; rip->i_update = 0; /* they are all up-to-date now */ }
注意:在alloc_inode和free_inode中调用alloc_bit和free_bit的时候,参数直接给的是inode号。
在inode位示图中直接根据inode号定位所在的位。这里的inode从0开始计数。但是在rw_inode中,给出了inode_number,定位此inode具体所在的磁盘位置(磁盘块号,块内偏移地址)。这时候并不能直接使用给出的inode号,因为具体存储inode的时候是从1号inode开始的。0号为保留,作为错误返回值。因此,计算具体存储位置的时候,并不能直接使用numb,这里是要减一,其他实现可能还会有不同的处理。
下面是一些解释:
Usually, the inode 0 is reserved because a return value of 0 usually signals an error. Multiple method in the Linux kernel -- especially in the VFS layer shared by all file systems -- return an ino_t, e.g.find_inode_number .
There are more reserved inode numbers. For example in ext2 :
#define EXT2_BAD_INO 1 /* Bad blocks inode */ #define EXT2_ROOT_INO 2 /* Root inode */ #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
and ext3 has:
#define EXT3_BAD_INO 1 /* Bad blocks inode */ #define EXT3_ROOT_INO 2 /* Root inode */ #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ #define EXT3_JOURNAL_INO 8 /* Journal inode */
Other fileystems use the ino 1 as root inode number. In general, a file system is free to choose its inode numbers and its reserved ino values (with the exception of 0).
最后关于update_times:
inode中i_update定义为char类型。主要使用到一个char8位数据中的三位,用来标识三个时间是否已经是最新的。为0表示是最新的,否则根据具体位确定更新那个时间域。具体比较还需要三个宏ATIME CTIME MTIME值。
#define ATIME 002 /* set if atime field needs updating */ #define CTIME 004 /* set if ctime field needs updating */ #define MTIME 010 /* set if mtime field needs updating */
转换成二进制则是10 100 1000 分别在一个字节的第2 3 4位为1.
/*===========================================================================* * dup_inode * *===========================================================================*/ PUBLIC void dup_inode(ip) struct inode *ip; /* The inode to be duplicated. */ { /* This routine is a simplified form of get_inode() for the case where * the inode pointer is already known. */ ip->i_count++; }