(1,未使用,该对象可用,b_data为NULL,
2,空闲,b_data指向一个空闲的缓存区,即缓存区没与与块设备中的数据块对应,
3,正在使用状态,b_data指向一个正在使用中的缓存区,
4,异步状态,b_data指向一个用来实现page I/O的临时缓存区)sector_t b_blocknr;//本缓存区对应的块号
应用层系统调用:read/write
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf[数据],size_t, count[大小]) { struct file *file; ssize_t ret = -EBADF; int fput_needed; file = fget_light(fd, &fput_needed);//从文件fd找到对应的file if (file) { loff_t pos = file_pos_read(file);//获得当前的位置指针 ret = vfs_write(file, buf[数据], count[大小], &pos); file_pos_write(file, pos);//更新位置指针 fput_light(file, fput_needed); } return ret; } ssize_t vfs_write(struct file *file, const char __user *buf[数据], size_t count[大小], loff_t *pos) { ssize_t ret; ...//各种检查 if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); //具体文件系统(ext2/3/4,xfs)均实例化为do_sync_write/read else ret = do_sync_write(file, buf, count, pos); ... return ret; } ssize_t do_sync_write(struct file *filp, const char __user *buf[数据], size_t len[大小], loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };//数据,大小 struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp);// kiocb.ki_filp=filp; //文件 kiocb.ki_pos = *ppos; kiocb.ki_left = len; kiocb.ki_nbytes = len; ... ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); //具体文件系统异步调用接口,ext2/3/4实例化为generic_file_aio_read/write。 xfs实例化为xfs_file_aio_read/write. ... } //具体文件系统均实例化为generic_file_aio_write/read ssize_t generic_file_aio_write(struct kiocb *iocb[文件], const struct iovec *iov[数据],unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; ...... mutex_lock(&inode->i_mutex); ret = __generic_file_aio_write(iocb[文件], iov[数据块], nr_segs[数据块个数], &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; err = generic_write_sync(file, pos, ret);//执行同步命令,vfs_fsync_range函数的封装 if (err < 0 && ret > 0) ret = err; } return ret; } //具体的数据写入函数:iocb[IO状态结构,file,offset等], iov[数据向量数组],nr_segs[数组个数],ppos[偏移] ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,unsigned long nr_segs, loff_t *ppos) { ...... pos = *ppos; ...... /* 带O_DIRECT标志时,直接把数据写入磁盘,绕过文件系统buffer */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; ssize_t written_buffered; //真接写入磁盘 written = generic_file_direct_write(iocb, iov, &nr_segs, pos,ppos, count, ocount); ...... } else { //写入buffer缓存区 written = generic_file_buffered_write(iocb, iov, nr_segs,pos, ppos, count, written); } ...... } //正常buffer写 ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, loff_t *ppos,size_t count[要写入的字节数], ssize_t written[已写入的字节数]) { struct file *file = iocb->ki_filp;//文件 ssize_t status; struct iov_iter i;//IO数据向量 iov_iter_init(&i, iov, nr_segs, count, written); // i->iov=iov;i->nr_segs=nr_segs;i->count=count+written; status = generic_perform_write(file, &i, pos); ...... } static ssize_t generic_perform_write(struct file *file[文件], struct iov_iter *i[IO数据], loff_t pos) { struct address_space *mapping = file->f_mapping; //缓存区组织管理结构:地址空间 const struct address_space_operations *a_ops = mapping->a_ops; long status = 0; ssize_t written = 0; unsigned int flags = 0; ...... do { struct page *page; pgoff_t index; /* Pagecache index for current page 缓存区中页的索引*/ unsigned long offset; /* Offset into pagecache page 页内偏移*/ unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ void *fsdata; offset = (pos & (PAGE_CACHE_SIZE - 1)); // (页大小:4K) index = pos >> PAGE_CACHE_SHIFT; //(页偏移:12位) bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_count(i)); again: ...... status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); //由具体的文件系统 索引或分配缓存页面 if (unlikely(status)) break; if (mapping_writably_mapped(mapping)) //mapping->i_mmap_writable!=0 此页面在用户态是否被修改 flush_dcache_page(page); //刷新page,将与此page相关的缓存(cache,buffer)刷回page pagefault_disable(); //关闭缺页中断 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); //数据拷贝到缓存页面 pagefault_enable(); //使能缺页中断 flush_dcache_page(page); mark_page_accessed(page);//对页面做标记 status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); //通知具体文件系统 把缓存数据页面提交到磁盘 if (unlikely(status < 0)) break; copied = status; cond_resched();//调度 iov_iter_advance(i, copied); //更新IO完成字节数 ...... balance_dirty_pages_ratelimited(mapping);//脏页处理 } while (iov_iter_count(i));//i->count return written ? written : status; } write_begin()-->ext2_write_begin()/xfs_vm_write_begin()[都是block_write_begin的封装] write_end()--->ext2_write_end()/xfs_vm_write_end()[都是generic_write_end()-->block_write_end()的封装] int block_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block) { struct inode *inode = mapping->host; int status = 0; struct page *page; pgoff_t index; unsigned start, end; int ownpage = 0; index = pos >> PAGE_CACHE_SHIFT; //12 start = pos & (PAGE_CACHE_SIZE - 1); // 4k end = start + len; page = *pagep; if (page == NULL) { ownpage = 1; //在地址空间查找或分配一个page page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { status = -ENOMEM; goto out; } *pagep = page; } else BUG_ON(!PageLocked(page)); //为page分配一组 缓存头buffer_head,并初始化 status = __block_prepare_write(inode, page, start, end, get_block); ...... }generic_write_end()是通用的页面完成处理函数,首先会调用block_write_end()来对页面中刚刚写入的缓冲区标记为(BH_Dirty)。