/*
* buffer.c 程序用于对高速缓冲区(池)进行操作和管理。高速缓冲
* 区位于内核代码和主内存区之间。
*
* |---|---|------------------|---------------------|-------------------|
* | | | * * * | buffer | |
* |---|---|------------------|---------------------|-------------------|
* | /|\
* |------------------------------------|
* \|/
* buffer_head (list)
*/
/*
* linux/fs/buffer.c
*
* (C) 1991 Linus Torvalds
*/
/*
* 'buffer.c' implements the buffer-cache functions. Race-conditions have
* been avoided by NEVER letting a interrupt change a buffer (except for the
* data, of course), but instead letting the caller do it. NOTE! As interrupts
* can wake up a caller, some cli-sti sequences are needed to check for
* sleep-on-calls. These should be extremely quick, though (I hope).
*/
/*
* NOTE! There is one discordant note here: checking floppies for
* disk change. This is where it fits best, I think, as it should
* invalidate changed floppy-disk-caches.
*/
#include <stdarg.h>
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <asm/system.h>
#include <asm/io.h>
extern int end; // 由连接程序ld 生成的表明程序末端的变量,代表的是一个内存地址
struct buffer_head * start_buffer = (struct buffer_head *) &end;
struct buffer_head * hash_table[NR_HASH];
static struct buffer_head * free_list;
static struct task_struct * buffer_wait = NULL;
int NR_BUFFERS = 0;
/*等待指定缓冲区解锁 */
static inline void wait_on_buffer(struct buffer_head * bh)
{
cli();
while (bh->b_lock)
sleep_on(&bh->b_wait);
sti();
}
/* 同步设备(存储介质)和内存高速缓冲中数据 */
int sys_sync(void)
{
int i;
struct buffer_head * bh;
/*
* sync_inodes的主要作用是把inode_table中的inode与磁盘上的一致起来。若
* 这样笼统的理解,但是这里牵涉到系统高速缓冲区。由此,同步操作实际被分
* 成了两个阶段:
* 1. 数据结构信息与高速缓冲区中的缓冲块同步问题,由相关程序独立负责;
* 2. 高速缓冲区中数据块与磁盘对应块的同步问题,由缓冲管理程序负责。
* sync_inodes不会直接与磁盘打交道,它只能前进到缓冲区这一步.
*
* inode ------- 高速缓冲buffer --------- 硬盘
*
*/
/* 将i 节点写入高速缓冲 */
sync_inodes(); /* write out inodes into buffers */
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++)
{
wait_on_buffer(bh); // 等待缓冲区解锁
if (bh->b_dirt)
ll_rw_block(WRITE,bh); // 产生写设备块请求
}
return 0;
}
/* 高速缓冲与硬盘上数据的同步 */
int sync_dev(int dev)
{
int i;
struct buffer_head * bh;
/* 高速缓冲buffer -- 存储介质 */
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++)
{
if (bh->b_dev != dev) // look for the dev
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
ll_rw_block(WRITE,bh);
}
/* 数据结构inode -- 高速缓冲 */
sync_inodes(); // 将i 节点数据写入高速缓冲
/* 高速缓冲buffer -- 存储介质 */
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++)
{
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
ll_rw_block(WRITE,bh);
}
return 0;
}
/* 指定设备在高速缓冲区中的数据无效 */
void inline invalidate_buffers(int dev)
{
int i;
struct buffer_head * bh;
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++)
{
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev)
// 指定设备在高速缓冲区中的数据无效
bh->b_uptodate = bh->b_dirt = 0;
}
}
/*
* This routine checks whether a floppy has been changed, and
* invalidates all buffer-cache-entries in that case. This
* is a relatively slow routine, so we have to try to minimize using
* it. Thus it is called only upon a 'mount' or 'open'. This
* is the best way of combining speed and utility, I think.
* People changing diskettes in the middle of an operation deserve
* to loose :-)
*
* NOTE! Although currently this is only for floppies, the idea is
* that any additional removable block-device will use this routine,
* and that mount/open needn't know that floppies/whatever are
* special.
*/
/*
* 该子程序检查一个软盘是否已经被更换,如果已经更换就使高速缓冲中与该软驱
* 对应的所有缓冲区无效。该子程序相对来说较慢,所以我们要尽量少使用它。
*/
/* 检查磁盘是否更换 */
void check_disk_change(int dev)
{
int i;
if (MAJOR(dev) != 2) // 是软盘设备吗?
return;
if (!floppy_change(dev & 0x03)) // 对应软盘是否已更换 ?
return;
/* 软盘已经更换 */
for (i=0 ; i<NR_SUPER ; i++)
if (super_block[i].s_dev == dev)
// 释放对应设备的i 节点位图和逻辑块位图所占的高速缓冲区
put_super(super_block[i].s_dev);
invalidate_inodes(dev); // i 节点无效
invalidate_buffers(dev); // 数据块信息无效
}
#define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH) // hash 函数
#define hash(dev,block) hash_table[_hashfn(dev,block)] // hash 表项
/* 从hash 队列和空闲缓冲队列中移走指定的缓冲块 */
/* 在数据结构中,包含两个数组,使用两个数组来实现两个链表 */
/* 对于每一块的话,她同时存在于hash_table可寻的范围,同时也可能在free list链表上 */
static inline void remove_from_queues(struct buffer_head * bh)
{
/* remove from hash-queue */
/* 从hash 队列中移除缓冲块,只是对应的指针操作,原因是数据结构 */
/* 采用的是数组的形式来存储的信息 */
if (bh->b_next)
bh->b_next->b_prev = bh->b_prev;
if (bh->b_prev)
bh->b_prev->b_next = bh->b_next;
/* 如果该缓冲区是该队列的头一个块,则让hash 表的对应项指向本队列中的下一个缓冲区 */
/*
* |------|--------|------|-------|-------|
* | | | | | | hash_table
* |------|--------|------|-------|-------|
* |
* \|/
* a link list(if it is the first block)
*/
if (hash(bh->b_dev,bh->b_blocknr) == bh)
hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
/* remove from free list */
/* 从空闲缓冲区表中移除缓冲块 */
if (!(bh->b_prev_free) || !(bh->b_next_free))
panic("Free block list corrupted");
bh->b_prev_free->b_next_free = bh->b_next_free;
bh->b_next_free->b_prev_free = bh->b_prev_free;
// 如果空闲链表头指向本缓冲区,则让其指向下一缓冲区
if (free_list == bh)
free_list = bh->b_next_free;
}
/* 将指定缓冲区插入空闲链表尾并放入hash 队列中,同上 */
static inline void insert_into_queues(struct buffer_head * bh)
{
/* put at end of free list */
bh->b_next_free = free_list;
bh->b_prev_free = free_list->b_prev_free;
free_list->b_prev_free->b_next_free = bh;
free_list->b_prev_free = bh;
/* put the buffer in new hash-queue if it has a device */
bh->b_prev = NULL;
bh->b_next = NULL;
if (!bh->b_dev)
return;
bh->b_next = hash(bh->b_dev,bh->b_blocknr);
hash(bh->b_dev,bh->b_blocknr) = bh;
bh->b_next->b_prev = bh;
}
/* 在高速缓冲中寻找给定设备和指定块的缓冲区块,返回的是指定的buffer_head */
static struct buffer_head * find_buffer(int dev, int block)
{
struct buffer_head * tmp;
/* dev, block --hash--> tmp (a member of the hash table) --> has free block ? */
/* 可能存在哈希的冲突 */
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
if (tmp->b_dev==dev && tmp->b_blocknr==block)
return tmp;
return NULL;
}
/*
* Why like this, I hear you say... The reason is race-conditions.
* As we don't lock buffers (unless we are readint them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
* the code is ready.
*/
/* 函数get_hash_table()在哈希链表中查找是否存在给定条件(dev,block,size)的buffer_head对象 */
struct buffer_head * get_hash_table(int dev, int block)
{
struct buffer_head * bh;
for (;;)
{
if (!(bh=find_buffer(dev,block)))
return NULL;
bh->b_count++; // 增加引用计数
wait_on_buffer(bh); // 等待该缓冲区解锁
// 经过了睡眠状态
if (bh->b_dev == dev && bh->b_blocknr == block)
return bh;
// 如果该缓冲区所属的设备号或块号在睡眠时发生了改变,则撤消对它的引用计数
bh->b_count--;
}
}
/*
* Ok, this is getblk, and it isn't very clear, again to hinder
* race-conditions. Most of the code is seldom used, (ie repeating),
* so it should be much more efficient than it looks.
*
* The algorithm is changed: hopefully better, and an elusive bug removed.
*/
/* 相当于一个权值,比较哪个空闲块最适合 */
#define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
/* 取高速缓冲区中指定的缓冲区 */
struct buffer_head * getblk(int dev,int block)
{
struct buffer_head * tmp, * bh;
repeat:
// 搜索hash 表,如果指定块已经在高速缓冲中
if (bh = get_hash_table(dev,block))
return bh;
// 如果指定的缓冲块还没存在,扫描空闲数据块链表,寻找空闲缓冲区
tmp = free_list;
do {
if (tmp->b_count) // 如果该缓冲区正被使用
continue; // 继续扫描下一项
// 缓冲头指针bh 为空,或者tmp 所指缓冲头的标志(修改、锁定)
// 权重小于bh 头标志的权重,寻找最合适的块
if (!bh || BADNESS(tmp)<BADNESS(bh))
{
bh = tmp; // 让bh 指向该tmp 缓冲区头
// 如果该tmp 缓冲区头表明缓冲区既没有修改也没有锁定标志置位
if (!BADNESS(tmp))
// 说明已为指定设备上的块取得对应的高速缓冲区
break;
}
/* and repeat until we find something good,while结束时bh中存放的是空闲的缓冲块 */
} while ((tmp = tmp->b_next_free) != free_list);
// 如果所有缓冲区都正被使用(所有缓冲区的头部引用计数都>0)
if (!bh)
{
sleep_on(&buffer_wait); // 睡眠
goto repeat;
}
// 睡醒,等待该缓冲区解锁
wait_on_buffer(bh);
if (bh->b_count) // 如果该缓冲区又被其它任务使用的话?
goto repeat;
while (bh->b_dirt) // 该缓冲区已被修改
{
sync_dev(bh->b_dev); // 将数据写盘
wait_on_buffer(bh); // 再次等待缓冲区解锁
if (bh->b_count) // 该缓冲区又被其它任务使用
goto repeat; // 只好再重复上述过程
}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
/* 注意!当进程为了等待该缓冲块而睡眠时,其它进程可能已经将该缓冲块 */
// 在高速缓冲hash 表中检查指定设备和块的缓冲区是否已经被加入进去
if (find_buffer(dev,block))
goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
/* OK,最终我们知道该缓冲区是指定参数的唯一一块 */
// 于是让我们占用此缓冲区。置引用计数为1,复位修改标志和有效(更新)标志
bh->b_count=1;
bh->b_dirt=0;
bh->b_uptodate=0;
// 从hash 队列和空闲块链表中移出该缓冲区头
// 让该缓冲区用于指定设备和其上的指定块
remove_from_queues(bh);
bh->b_dev=dev;
bh->b_blocknr=block;
// 然后根据此新的设备号和块号重新插入空闲链表和hash 队列新位置处
insert_into_queues(bh);
return bh; // 并最终返回缓冲头指针
/*
* 该函数的最终目的是寻找到一块没有使用的缓冲块。linux中的缓冲块的
* 目的是就是在设备和读写操作之间提供的暂存数据的地方。如果通过get_hash_table
* 函数计算的出的值不是NULL,说明该块缓冲区已经存在,直接返回即可。
* 否则需要寻找合适缓冲块,方法是从空闲链表free_list中寻找一块
* ,然后将其插入到相应的位置上,最后返回bh指针
*/
}
/* 释放指定的缓冲区 */
void brelse(struct buffer_head * buf)
{
// 等待该缓冲区解锁。引用计数递减1。唤醒等待空闲缓冲区的进程
if (!buf)
return;
wait_on_buffer(buf);
if (!(buf->b_count--))
panic("Trying to free free buffer");
wake_up(&buffer_wait);
}
/*
* bread() reads a specified block and returns the buffer that contains
* it. It returns NULL if the block was unreadable.
*/
/*
* 从设备上读取指定的数据块并返回含有数据的缓冲区。如果指定的块不存在
* 则返回NULL.
*/
struct buffer_head * bread(int dev,int block)
{
struct buffer_head * bh;
if (!(bh=getblk(dev,block))) // 在高速缓冲中申请一块缓冲区
panic("bread: getblk returned NULL\n");
// 如果该缓冲区中的数据是有效的(已更新的)可以直接使用
if (bh->b_uptodate)
return bh;
// 否则调用ll_rw_block()函数,产生读设备块请求
ll_rw_block(READ,bh);
wait_on_buffer(bh);
// 如果该缓冲区已更新,则返回缓冲区头指针
if (bh->b_uptodate)
return bh;
// 否则表明读设备操作失败,释放该缓冲区,返回NULL 指针,退出
brelse(bh);
return NULL;
}
/* 从from 地址复制一块数据到to 位置 */
#define COPYBLK(from,to) \
__asm__("cld\n\t" \
"rep\n\t" \
"movsl\n\t" \
::"c" (BLOCK_SIZE/4),"S" (from),"D" (to) \
:"cx","di","si")
/*
* bread_page reads four buffers into memory at the desired address. It's
* a function of its own, as there is some speed to be got by reading them
* all at the same time, not waiting for one to be read, and then another
* etc.
*/
/* 读设备上一个页面(4 个缓冲块)的内容到内存指定的地址 */
void bread_page(unsigned long address,int dev,int b[4])
{
struct buffer_head * bh[4];
int i;
/* 循环四次得到四个buffer_head指针 */
for (i=0 ; i<4 ; i++)
if (b[i]) {
if (bh[i] = getblk(dev,b[i]))
if (!bh[i]->b_uptodate)
ll_rw_block(READ,bh[i]);
} else
bh[i] = NULL;
/* 将buffer_head指针对应的缓冲区数据读到address中 */
for (i=0 ; i<4 ; i++,address += BLOCK_SIZE)
if (bh[i]) {
wait_on_buffer(bh[i]);
if (bh[i]->b_uptodate)
COPYBLK((unsigned long) bh[i]->b_data,address);
brelse(bh[i]);
}
}
/*
* Ok, breada can be used as bread, but additionally to mark other
* blocks for reading as well. End the argument list with a negative
* number.
*/
/*
* breada 可以象bread 一样使用,但会另外预读一些块。该函数参数列表
* 需要使用一个负数来表明参数列表的结束。
*/
struct buffer_head * breada(int dev,int first, ...)
{
va_list args;
struct buffer_head * bh, *tmp;
va_start(args,first); // 取可变参数表中第1 个参数(块号)
if (!(bh=getblk(dev,first))) // 取高速缓冲中指定设备和块号的缓冲区
panic("bread: getblk returned NULL\n");
// 如果该缓冲区数据无效,则发出读设备数据块请求
if (!bh->b_uptodate)
ll_rw_block(READ,bh);
// 顺序取可变参数表中其它预读块号,并作与上面同样处理
while ((first=va_arg(args,int))>=0)
{
tmp=getblk(dev,first);
if (tmp)
{
if (!tmp->b_uptodate)
ll_rw_block(READA,bh);
tmp->b_count--;
}
}
va_end(args); // 可变参数表中所有参数处理完毕
wait_on_buffer(bh); // 等待第1 个缓冲区解锁
if (bh->b_uptodate) // 数据有效 ?
return bh;
brelse(bh); // 释放该缓冲区
return (NULL);
}
/* 缓冲区初始化函数, 参数buffer_end 是指定的缓冲区内存的末端 */
void buffer_init(long buffer_end)
{
struct buffer_head * h = start_buffer;
void * b;
int i;
/* 获得缓冲区的高端,存放在变量b中 */
if (buffer_end == 1<<20)
b = (void *) (640*1024);
else
b = (void *) buffer_end;
/*
* h(start_buffer) b(buffer_end)
* \|/ \|/
* |-buffer_head--|----|***|-----buffer-------|----------------|
*
*/
while ( (b -= BLOCK_SIZE) >= ((void *) (h+1)) ) // 直到无法分配
{
h->b_dev = 0;
h->b_dirt = 0;
h->b_count = 0;
h->b_lock = 0;
h->b_uptodate = 0;
h->b_wait = NULL;
/* 散列hash_table之后没有链表 */
h->b_next = NULL;
h->b_prev = NULL;
h->b_data = (char *) b;
/* 形成free list */
h->b_prev_free = h-1;
h->b_next_free = h+1;
h++;
NR_BUFFERS++;
if (b == (void *) 0x100000)
b = (void *) 0xA0000;
}
/* 形成双向链表 */
h--;
free_list = start_buffer;
free_list->b_prev_free = h;
h->b_next_free = free_list;
// 初始化hash 表(哈希表、散列表),置表中所有的指针为NULL
for (i=0;i<NR_HASH;i++)
hash_table[i]=NULL;
/*
* 在该函数buffer_init调用之后,首先建立的是如下的两个内存结构:
* |-------------|
* | | hash_table[0->size] = NULL
* | * * * |
* |-------------|
*
* h(start_buffer) b(buffer_end)
* \|/ \|/
* |-buffer_head--|----|***|-----buffer-------|----------------|
*
* 现在如果是产生第一个一个新的请求的话, 在函数getblk中首先是调用函数
* get_hash_table,在该函数中调用函数buffer_head,返回NULL(该哈希表中
* 不存在相应的缓冲块),然后寻找到一块,并相应的修改数据。
*
* |-------------|
* |-------------| hash_table[x] != NULL即是刚刚插入的一项
* | * * * |
* |-------------|
*
* h(start_buffer) b(buffer_end)
* \|/ \|/
* |-buffer_head--|----|***|-----buffer-------|----------------|
* 已经分配b_count != 0
* 下面就是继续这个过程
*/
}