PNFS中block layout write的变态回调函数备忘


bl_write_pagelist 中构建bio结构,注册回调函数

bio->bi_end_io = bl_end_io_write;

在bl_end_io_write()中,调用这个函数

 put_parallel(data);

static inline void put_parallel(struct parallel_io *p)
{
    kref_put(&p->refcnt, destroy_parallel);
}

kref_put()检查引用计数,若减为0,调用这个函数

par->pnfs_callback = bl_end_par_io_write;

在bl_end_par_io_write()中

static void
bl_end_par_io_write(void *data)
{
    struct nfs_write_data *wdata = data;
    struct pnfs_layout_segment *lseg = wdata->pdata.lseg;

    /* STUB - ignoring error handling */
    wdata->task.tk_status = 0;
    wdata->verf.committed = NFS_FILE_SYNC;
    put_lseg(lseg);
    INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
    schedule_work(&wdata->task.u.tk_work);
}


在bl_write_cleanup()中


/* Function scheduled for call during bl_end_par_io_write,
 * it marks sectors as written and extends the commitlist.
 */
static void bl_write_cleanup(struct work_struct *work)
{
    struct rpc_task *task;
    struct nfs_write_data *wdata;
    dprintk("%s enter\n", __func__);
    task = container_of(work, struct rpc_task, u.tk_work);
    wdata = container_of(task, struct nfs_write_data, task);
    pnfs_callback_ops->nfs_writelist_complete(wdata);
}


  pnfs_callback_ops->nfs_writelist_complete(wdata);

初始化有这样一组钩子函数

struct pnfs_client_operations pnfs_ops = {
    .nfs_getdevicelist = nfs4_pnfs_getdevicelist,
    .nfs_getdeviceinfo = nfs4_pnfs_getdeviceinfo,
    .nfs_readlist_complete = pnfs_read_done,
    .nfs_writelist_complete = pnfs_writeback_done,
    .nfs_commit_complete = pnfs_commit_done,
};


/* Post-write completion function
 * Invoked by all layout drivers when write_pagelist is done.
 *
 * NOTE: callers set data->pnfsflags PNFS_NO_RPC
 * so that the NFS cleanup routines perform only the page cache
 * cleanup.
 */
static void
pnfs_writeback_done(struct nfs_write_data *data)
{
    struct pnfs_call_data *pdata = &data->pdata;

    dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);

    if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
        struct nfs4_pnfs_layout_segment range = {
            .iomode = IOMODE_RW,
            .offset = data->args.offset,
            .length = data->args.count,
        };
        dprintk("%s: retrying\n", __func__);
        _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
        pnfs_initiate_write(data, NFS_CLIENT(data->inode),
                    pdata->call_ops, pdata->how);
    }
}

黄色部分很重要,若写完成的状态是EAGAIN,则再写一次。

进去看看:

static int
pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
{
    pdata->lseg = NULL;
    pdata->call_ops->rpc_call_done(task, data);
    if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
        return -EAGAIN;
    if (pdata->pnfsflags & PNFS_NO_RPC) {
        pdata->call_ops->rpc_release(data);
    } else {
        /*
         * just restore original rpc call ops
         * rpc_release will be called later by the rpc scheduling layer.
         */
        task->tk_ops = pdata->call_ops;
    }
    return 0;
}


黄色部分的代码是在 nfs_flush_one()(nfs层的函数)填充的,

static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
{
    struct nfs_page        *req;
    struct page        **pages;
    struct nfs_write_data    *data;

    data = nfs_writedata_alloc(npages);
    if (!data)
        goto out_bad;

    pages = data->pagevec;
    while (!list_empty(head)) {
        req = nfs_list_entry(head->next);
        nfs_list_remove_request(req);
        nfs_list_add_request(req, &data->pages);
        ClearPageError(req->wb_page);
        *pages++ = req->wb_page;
    }
    req = nfs_list_entry(data->pages.next);

    /* Set up the argument struct */
    return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 out_bad:
    while (!list_empty(head)) {
        req = nfs_list_entry(head->next);
        nfs_list_remove_request(req);
        nfs_redirty_request(req);
    }
    return -ENOMEM;
}


××××××××××××××××××××××××××××××××××××××××××××××××××××××××

static const struct rpc_call_ops nfs_write_full_ops = {
#if defined(CONFIG_NFS_V4_1)
    .rpc_call_prepare = nfs_write_prepare,
#endif /* CONFIG_NFS_V4_1 */
    .rpc_call_done = nfs_writeback_done_full,
    .rpc_release = nfs_writeback_release_full,
};

×××××××××××××××××××××××××××××××××××××××××××××

/*
 * Handle a write reply that flushes a whole page.
 *
 * FIXME: There is an inherent race with invalidate_inode_pages and
 *      writebacks since the page->count is kept > 1 for as long
 *      as the page has a write request pending.
 */
static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
    struct nfs_write_data    *data = calldata;

    nfs_writeback_done(task, data);

}


 nfs_writeback_done(task, data)中会调用这个函数

status = NFS_PROTO(data->inode)->write_done(task, data);

上述函数在此处填充:

pnfs_v4_clientops_init(void)
{
    struct nfs_rpc_ops *p = (struct nfs_rpc_ops *)&pnfs_v4_clientops;
//复制了pnfs-v4_clientops的内存,并在下面补充特有的操作
    memcpy(p, &nfs_v4_clientops, sizeof(*p));
    p->file_ops        = &pnfs_file_operations;
    p->setattr        = pnfs4_proc_setattr;
    p->read_done        = pnfs4_read_done;
    p->write_setup        = pnfs4_proc_write_setup;
    p->write_done        = pnfs4_write_done;
    p->commit_setup        = pnfs4_proc_commit_setup;
    p->commit_done        = pnfs4_commit_done;
}


在pnfs4_write_done()中有

pnfs_need_layoutcommit(NFS_I(data->inode),
                           data->args.context);

上述函数的注释


/* Set context to indicate we require a layoutcommit
 * If we don't even have a layout, we don't need to commit it.
 */  只是设置了context,并未执行。


##########################################

回退到   nfs_writeback_done(task, data)中,


回退到pnfs_call_done中,调用 nfs_writeback_release_full

######################################################


static void nfs_writeback_release_full(void *calldata)
{
    struct nfs_write_data    *data = calldata;
    int status = data->task.tk_status;

    /* Update attributes as result of writeback. */
    while (!list_empty(&data->pages)) {
        struct nfs_page *req = nfs_list_entry(data->pages.next);
        struct page *page = req->wb_page;

        nfs_list_remove_request(req);

        dprintk("NFS: %5u write (%s/%lld %d@%lld)",
            data->task.tk_pid,
            req->wb_context->path.dentry->d_inode->i_sb->s_id,
            (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
            req->wb_bytes,
            (long long)req_offset(req));

        if (status < 0) {
            nfs_set_pageerror(page);
            nfs_context_set_write_error(req->wb_context, status);
            dprintk(", error = %d\n", status);
            goto remove_request;
        }

        if (nfs_write_need_commit(data)) {
            memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
            nfs_mark_request_commit(req);
            nfs_end_page_writeback(page);
            dprintk(" marked for commit\n");
            goto next;
        }
        dprintk(" OK\n");
remove_request:
        nfs_end_page_writeback(page);
        nfs_inode_remove_request(req);
    next:
        nfs_clear_page_tag_locked(req);
    }
    nfs_writedata_release(calldata);
}

在上述函数中,真正根据状态修改了对应的page,这是我们应该重点关注的。


是根据 int status = data->task.tk_status来修改page标志的,需要找到这个 变量是在哪里修改的。



bl_end_par_io_write(void *data)
{
    struct nfs_write_data *wdata = data;
    struct pnfs_layout_segment *lseg = wdata->pdata.lseg;

    /* STUB - ignoring error handling */
    wdata->task.tk_status = 0;
    wdata->verf.committed = NFS_FILE_SYNC;
    put_lseg(lseg);
    INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
    schedule_work(&wdata->task.u.tk_work);
}



终于找到最重要的地方了,红色部分对修改page标志的地方赋值,可以确定这个与bio的操作成功与否没有关系,则论证了不必修改page标志。



你可能感兴趣的:(PNFS中block layout write的变态回调函数备忘)