bl_write_pagelist 中构建bio结构,注册回调函数
bio->bi_end_io = bl_end_io_write;
在bl_end_io_write()中,调用这个函数
put_parallel(data);
static inline void put_parallel(struct parallel_io *p)
{
kref_put(&p->refcnt, destroy_parallel);
}
kref_put()检查引用计数,若减为0,调用这个函数
par->pnfs_callback = bl_end_par_io_write;
在bl_end_par_io_write()中
static void
bl_end_par_io_write(void *data)
{
struct nfs_write_data *wdata = data;
struct pnfs_layout_segment *lseg = wdata->pdata.lseg;
/* STUB - ignoring error handling */
wdata->task.tk_status = 0;
wdata->verf.committed = NFS_FILE_SYNC;
put_lseg(lseg);
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
}
在bl_write_cleanup()中
/* Function scheduled for call during bl_end_par_io_write,
* it marks sectors as written and extends the commitlist.
*/
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
struct nfs_write_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
pnfs_callback_ops->nfs_writelist_complete(wdata);
}
pnfs_callback_ops->nfs_writelist_complete(wdata);
初始化有这样一组钩子函数
struct pnfs_client_operations pnfs_ops = {
.nfs_getdevicelist = nfs4_pnfs_getdevicelist,
.nfs_getdeviceinfo = nfs4_pnfs_getdeviceinfo,
.nfs_readlist_complete = pnfs_read_done,
.nfs_writelist_complete = pnfs_writeback_done,
.nfs_commit_complete = pnfs_commit_done,
};
/* Post-write completion function
* Invoked by all layout drivers when write_pagelist is done.
*
* NOTE: callers set data->pnfsflags PNFS_NO_RPC
* so that the NFS cleanup routines perform only the page cache
* cleanup.
*/
static void
pnfs_writeback_done(struct nfs_write_data *data)
{
struct pnfs_call_data *pdata = &data->pdata;
dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
struct nfs4_pnfs_layout_segment range = {
.iomode = IOMODE_RW,
.offset = data->args.offset,
.length = data->args.count,
};
dprintk("%s: retrying\n", __func__);
_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
pnfs_initiate_write(data, NFS_CLIENT(data->inode),
pdata->call_ops, pdata->how);
}
}
黄色部分很重要,若写完成的状态是EAGAIN,则再写一次。
进去看看:
static int
pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
{
pdata->lseg = NULL;
pdata->call_ops->rpc_call_done(task, data);
if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
return -EAGAIN;
if (pdata->pnfsflags & PNFS_NO_RPC) {
pdata->call_ops->rpc_release(data);
} else {
/*
* just restore original rpc call ops
* rpc_release will be called later by the rpc scheduling layer.
*/
task->tk_ops = pdata->call_ops;
}
return 0;
}
黄色部分的代码是在 nfs_flush_one()(nfs层的函数)填充的,
static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
data = nfs_writedata_alloc(npages);
if (!data)
goto out_bad;
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &data->pages);
ClearPageError(req->wb_page);
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
/* Set up the argument struct */
return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
out_bad:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_redirty_request(req);
}
return -ENOMEM;
}
××××××××××××××××××××××××××××××××××××××××××××××××××××××××
static const struct rpc_call_ops nfs_write_full_ops = {
#if defined(CONFIG_NFS_V4_1)
.rpc_call_prepare = nfs_write_prepare,
#endif /* CONFIG_NFS_V4_1 */
.rpc_call_done = nfs_writeback_done_full,
.rpc_release = nfs_writeback_release_full,
};
×××××××××××××××××××××××××××××××××××××××××××××
/*
* Handle a write reply that flushes a whole page.
*
* FIXME: There is an inherent race with invalidate_inode_pages and
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
nfs_writeback_done(task, data);
}
nfs_writeback_done(task, data)中会调用这个函数
status = NFS_PROTO(data->inode)->write_done(task, data);
上述函数在此处填充:
pnfs_v4_clientops_init(void)
{
struct nfs_rpc_ops *p = (struct nfs_rpc_ops *)&pnfs_v4_clientops;
//复制了pnfs-v4_clientops的内存,并在下面补充特有的操作
memcpy(p, &nfs_v4_clientops, sizeof(*p));
p->file_ops = &pnfs_file_operations;
p->setattr = pnfs4_proc_setattr;
p->read_done = pnfs4_read_done;
p->write_setup = pnfs4_proc_write_setup;
p->write_done = pnfs4_write_done;
p->commit_setup = pnfs4_proc_commit_setup;
p->commit_done = pnfs4_commit_done;
}
在pnfs4_write_done()中有
pnfs_need_layoutcommit(NFS_I(data->inode),
data->args.context);
上述函数的注释
/* Set context to indicate we require a layoutcommit
* If we don't even have a layout, we don't need to commit it.
*/ 只是设置了context,并未执行。
##########################################
回退到 nfs_writeback_done(task, data)中,
回退到pnfs_call_done中,调用 nfs_writeback_release_full
######################################################
static void nfs_writeback_release_full(void *calldata)
{
struct nfs_write_data *data = calldata;
int status = data->task.tk_status;
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
dprintk("NFS: %5u write (%s/%lld %d@%lld)",
data->task.tk_pid,
req->wb_context->path.dentry->d_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
nfs_set_pageerror(page);
nfs_context_set_write_error(req->wb_context, status);
dprintk(", error = %d\n", status);
goto remove_request;
}
if (nfs_write_need_commit(data)) {
memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
nfs_mark_request_commit(req);
nfs_end_page_writeback(page);
dprintk(" marked for commit\n");
goto next;
}
dprintk(" OK\n");
remove_request:
nfs_end_page_writeback(page);
nfs_inode_remove_request(req);
next:
nfs_clear_page_tag_locked(req);
}
nfs_writedata_release(calldata);
}
在上述函数中,真正根据状态修改了对应的page,这是我们应该重点关注的。
是根据 int status = data->task.tk_status来修改page标志的,需要找到这个 变量是在哪里修改的。
bl_end_par_io_write(void *data)
{
struct nfs_write_data *wdata = data;
struct pnfs_layout_segment *lseg = wdata->pdata.lseg;
/* STUB - ignoring error handling */
wdata->task.tk_status = 0;
wdata->verf.committed = NFS_FILE_SYNC;
put_lseg(lseg);
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
}
终于找到最重要的地方了,红色部分对修改page标志的地方赋值,可以确定这个与bio的操作成功与否没有关系,则论证了不必修改page标志。