From 4b8ee2b82e8b0b6e17ee33feb74fcdb5c6d8dbdd Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Fri, 20 May 2011 01:34:46 +0400 Subject: nfs41: Correct offset for LAYOUTCOMMIT A client sends offset to MDS as it was seen by DS. As result, file size after copy is only half of original file size in case of 2 DS. Signed-off-by: Vitaliy Gusev Cc: stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a520..101c85a3644e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1009,7 +1009,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { struct nfs_inode *nfsi = NFS_I(wdata->inode); - loff_t end_pos = wdata->args.offset + wdata->res.count; + loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; spin_lock(&nfsi->vfs_inode.i_lock); -- cgit v1.2.3 From fb3296eb4636763918edef2d22e45b85b15d4518 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:47:26 +0300 Subject: pnfs: Use byte-range for layoutget Add offset and count parameters to pnfs_update_layout and use them to get the layout in the pageio path. Order cache layout segments in the following order: * offset (ascending) * length (descending) * iomode (RW before READ) Test byte range against the layout segment in use in pnfs_{read,write}_pg_test so not to coalesce pages not using the same layout segment. [fix lseg ordering] [clean up pnfs_find_lseg lseg arg] [remove unnecessary FIXME] [fix ordering in pnfs_insert_layout] [clean up pnfs_insert_layout] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 165 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 126 insertions(+), 39 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a520..c2f09e9b670e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -261,6 +261,65 @@ put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(put_lseg); +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1 : NFS4_MAX_UINT64; +} + +/* + * is l2 fully contained in l1? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_contained(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (start1 <= start2) && (end1 >= end2); +} + +/* + * is l1 and l2 intersecting? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_intersecting(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (end1 == NFS4_MAX_UINT64 || end1 > start2) && + (end2 == NFS4_MAX_UINT64 || end2 > start1); +} + static bool should_free_lseg(u32 lseg_iomode, u32 recall_iomode) { @@ -467,7 +526,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode, + struct pnfs_layout_range *range, gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; @@ -499,11 +558,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, goto out_err_free; } - lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range.iomode = iomode; - lgp->args.range.offset = 0; - lgp->args.range.length = NFS4_MAX_UINT64; + lgp->args.range = *range; lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); @@ -518,7 +577,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, nfs4_proc_layoutget(lgp); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(iomode), &lo->plh_flags); + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } /* free xdr pages */ @@ -625,10 +684,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) * are seen first. */ static s64 -cmp_layout(u32 iomode1, u32 iomode2) +cmp_layout(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) { + s64 d; + + /* high offset > low offset */ + d = l1->offset - l2->offset; + if (d) + return d; + + /* short length > long length */ + d = l2->length - l1->length; + if (d) + return d; + /* read > read/write */ - return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); + return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); } static void @@ -636,13 +708,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; - int found = 0; dprintk("%s:Begin\n", __func__); assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { - if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; list_add_tail(&lseg->pls_list, &lp->pls_list); dprintk("%s: inserted lseg %p " @@ -652,16 +723,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, lseg->pls_range.offset, lseg->pls_range.length, lp, lp->pls_range.iomode, lp->pls_range.offset, lp->pls_range.length); - found = 1; - break; - } - if (!found) { - list_add_tail(&lseg->pls_list, &lo->plh_segs); - dprintk("%s: inserted lseg %p " - "iomode %d offset %llu length %llu at tail\n", - __func__, lseg, lseg->pls_range.iomode, - lseg->pls_range.offset, lseg->pls_range.length); + goto out; } + list_add_tail(&lseg->pls_list, &lo->plh_segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length); +out: get_layout_hdr(lo); dprintk("%s:Return\n", __func__); @@ -721,16 +790,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) * READ RW true */ static int -is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) +is_matching_lseg(struct pnfs_layout_range *ls_range, + struct pnfs_layout_range *range) { - return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); + struct pnfs_layout_range range1; + + if ((range->iomode == IOMODE_RW && + ls_range->iomode != IOMODE_RW) || + !lo_seg_intersecting(ls_range, range)) + return 0; + + /* range1 covers only the first byte in the range */ + range1 = *range; + range1.length = 1; + return lo_seg_contained(ls_range, &range1); } /* * lookup range in layout */ static struct pnfs_layout_segment * -pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) +pnfs_find_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range) { struct pnfs_layout_segment *lseg, *ret = NULL; @@ -739,11 +820,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && - is_matching_lseg(lseg, iomode)) { + is_matching_lseg(&lseg->pls_range, range)) { ret = get_lseg(lseg); break; } - if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(range, &lseg->pls_range) > 0) break; } @@ -759,9 +840,16 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + loff_t pos, + u64 count, enum pnfs_iomode iomode, gfp_t gfp_flags) { + struct pnfs_layout_range arg = { + .iomode = iomode, + .offset = pos, + .length = count, + }; struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; @@ -789,7 +877,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, iomode); + lseg = pnfs_find_lseg(lo, &arg); if (lseg) goto out_unlock; @@ -811,7 +899,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode, gfp_flags); + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -838,17 +926,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; - /* Verify we got what we asked for. - * Note that because the xdr parsing only accepts a single - * element array, this can fail even if the server is behaving - * correctly. - */ - if (lgp->args.range.iomode > res->range.iomode || - res->range.offset != 0 || - res->range.length != NFS4_MAX_UINT64) { - status = -EINVAL; - goto out; - } /* Inject layout blob into I/O device driver */ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { @@ -903,9 +980,14 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, + req_offset(req), + pgio->pg_count, IOMODE_READ, GFP_KERNEL); - } + } else if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return 0; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } @@ -926,9 +1008,14 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, + req_offset(req), + pgio->pg_count, IOMODE_RW, GFP_NOFS); - } + } else if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return 0; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } -- cgit v1.2.3 From 707ed5fdb587c71fdb7ad224ba1d80231f33c974 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:47:46 +0300 Subject: pnfs: align layoutget requests on page boundaries Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c2f09e9b670e..2357ee343f4a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -850,6 +850,7 @@ pnfs_update_layout(struct inode *ino, .offset = pos, .length = count, }; + unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; @@ -899,6 +900,13 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } + pg_offset = arg.offset & ~PAGE_CACHE_MASK; + if (pg_offset) { + arg.offset -= pg_offset; + arg.length += pg_offset; + } + arg.length = PAGE_CACHE_ALIGN(arg.length); + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); -- cgit v1.2.3 From 778b5502fdba5b183553f3f2ef1672ba78ac58b6 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:48:02 +0300 Subject: pnfs: Use byte-range for cb_layoutrecall Use recalled range to invalidate particular layout segments in the layout cache. Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2357ee343f4a..20436a5e76cd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -321,10 +321,12 @@ lo_seg_intersecting(struct pnfs_layout_range *l1, } static bool -should_free_lseg(u32 lseg_iomode, u32 recall_iomode) +should_free_lseg(struct pnfs_layout_range *lseg_range, + struct pnfs_layout_range *recall_range) { - return (recall_iomode == IOMODE_ANY || - lseg_iomode == recall_iomode); + return (recall_range->iomode == IOMODE_ANY || + lseg_range->iomode == recall_range->iomode) && + lo_seg_intersecting(lseg_range, recall_range); } /* Returns 1 if lseg is removed from list, 0 otherwise */ @@ -355,7 +357,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - u32 iomode) + struct pnfs_layout_range *recall_range) { struct pnfs_layout_segment *lseg, *next; int invalid = 0, removed = 0; @@ -368,7 +370,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return 0; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) - if (should_free_lseg(lseg->pls_range.iomode, iomode)) { + if (!recall_range || + should_free_lseg(&lseg->pls_range, recall_range)) { dprintk("%s: freeing lseg %p iomode %d " "offset %llu length %llu\n", __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, @@ -417,7 +420,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); } spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); -- cgit v1.2.3 From 636fb9c89d7e216aac3d406e458864420057e981 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:51:33 +0300 Subject: pnfs: alloc and free layout_hdr layoutdriver methods [gfp_flags] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 20436a5e76cd..ef535f2a2c74 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo) atomic_inc(&lo->plh_refcount); } +static struct pnfs_layout_hdr * +pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : + kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); +} + +static void +pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); +} + static void destroy_layout_hdr(struct pnfs_layout_hdr *lo) { dprintk("%s: freeing layout cache %p\n", __func__, lo); BUG_ON(!list_empty(&lo->plh_layouts)); NFS_I(lo->plh_inode)->layout = NULL; - kfree(lo); + pnfs_free_layout_hdr(lo); } static void @@ -744,7 +759,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -777,7 +792,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) if (likely(nfsi->layout == NULL)) /* Won the race? */ nfsi->layout = new; else - kfree(new); + pnfs_free_layout_hdr(new); return nfsi->layout; } -- cgit v1.2.3 From d20581aa4be11407c9eeeb75992df5ef176bba0f Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:03 +0300 Subject: pnfs: support for non-rpc layout drivers Non-rpc layout driver such as for objects and blocks implement their own I/O path and error handling logic. Therefore bypass NFS-based error handling for these layout drivers. [fix lseg ref-count bugs, and null de-refs] [Fall out from: non-rpc layout drivers] Signed-off-by: Boaz Harrosh [get rid of PNFS_USE_RPC_CODE] [get rid of __nfs4_write_done_cb] [revert useless change in nfs4_write_done_cb] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ef535f2a2c74..171662114fdd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -243,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) { struct inode *inode = lseg->pls_layout->plh_inode; - BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); if (list_empty(&lseg->pls_layout->plh_segs)) { set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); @@ -1054,6 +1054,29 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; } +/* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_write_done(struct nfs_write_data *data) +{ + int status; + + if (!data->pnfs_error) { + pnfs_set_layoutcommit(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } + + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_write(data, NFS_CLIENT(data->inode), + data->mds_ops, NFS_FILE_SYNC); + return status ? : -EAGAIN; +} +EXPORT_SYMBOL_GPL(pnfs_ld_write_done); + enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, const struct rpc_call_ops *call_ops, int how) @@ -1078,6 +1101,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +/* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_read_done(struct nfs_read_data *data) +{ + int status; + + if (!data->pnfs_error) { + __nfs4_read_done_cb(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } + + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_read(data, NFS_CLIENT(data->inode), + data->mds_ops); + return status ? : -EAGAIN; +} +EXPORT_SYMBOL_GPL(pnfs_ld_read_done); + /* * Call the appropriate parallel I/O subsystem read function. */ -- cgit v1.2.3 From cbe8260369c9f88eafa035cd327dc3e02fad528c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:52:37 +0300 Subject: pnfs: layoutreturn NFSv4.1 LAYOUTRETURN implementation Currently, does not support layout-type payload encoding. Signed-off-by: Alexandros Batsakis Signed-off-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Marc Eshel Signed-off-by: Zhang Jingwang [call pnfs_return_layout right before pnfs_destroy_layout] [remove assert_spin_locked from pnfs_clear_lseg_list] [remove wait parameter from the layoutreturn path.] [remove return_type field from nfs4_layoutreturn_args] [remove range from nfs4_layoutreturn_args] [no need to send layoutcommit from _pnfs_return_layout] [don't wait on sync layoutreturn] [fix layout stateid in layoutreturn args] [fixed NULL deref in _pnfs_return_layout] [removed recaim member of nfs4_layoutreturn_args] Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 171662114fdd..00b128241746 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -619,6 +619,51 @@ out_err_free: return NULL; } +/* Initiates a LAYOUTRETURN(FILE) */ +int +_pnfs_return_layout(struct inode *ino) +{ + struct pnfs_layout_hdr *lo = NULL; + struct nfs_inode *nfsi = NFS_I(ino); + LIST_HEAD(tmp_list); + struct nfs4_layoutreturn *lrp; + nfs4_stateid stateid; + int status = 0; + + dprintk("--> %s\n", __func__); + + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { + spin_unlock(&ino->i_lock); + dprintk("%s: no layout segments to return\n", __func__); + goto out; + } + stateid = nfsi->layout->plh_stateid; + /* Reference matched in nfs4_layoutreturn_release */ + get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + + WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + goto out; + } + + lrp->args.stateid = stateid; + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; + lrp->args.inode = ino; + lrp->clp = NFS_SERVER(ino)->nfs_client; + + status = nfs4_proc_layoutreturn(lrp); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; -- cgit v1.2.3 From dfed206b8857d41a91ebba030f99e30017a44dda Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 20:25:22 +0300 Subject: NFSv4.1: unify pnfs_pageio_init functions Use common code for pnfs_pageio_init_{read,write} and use a common generic pg_test function. Note that this function always assumes the the layout driver's pg_test method is implemented. [Fix BUG] Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 55 +++++++++++++++---------------------------------------- 1 file changed, 15 insertions(+), 40 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 00b128241746..568ab0eef677 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1043,46 +1043,30 @@ out_forget_reply: goto out; } -static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) -{ - if (pgio->pg_count == prev->wb_bytes) { - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(req), - pgio->pg_count, - IOMODE_READ, - GFP_KERNEL); - } else if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return 0; - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); -} - -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +int +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) { - struct pnfs_layoutdriver_type *ld; + enum pnfs_iomode access_type; + gfp_t gfp_flags; - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; -} + /* We assume that pg_ioflags == 0 iff we're reading a page */ + if (pgio->pg_ioflags == 0) { + access_type = IOMODE_READ; + gfp_flags = GFP_KERNEL; + } else { + access_type = IOMODE_RW; + gfp_flags = GFP_NOFS; + } -static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) -{ if (pgio->pg_count == prev->wb_bytes) { /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, req_offset(req), pgio->pg_count, - IOMODE_RW, - GFP_NOFS); + access_type, + gfp_flags); } else if (pgio->pg_lseg && req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) @@ -1090,15 +1074,6 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; -} - /* * Called by non rpc-based layout drivers */ -- cgit v1.2.3 From 18ad0a9f2ccd260d37dd6bc5fa04c7819def4c84 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 21:03:56 +0300 Subject: NFSv4.1: change pg_test return type to bool Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 568ab0eef677..212fc292761a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1043,7 +1043,7 @@ out_forget_reply: goto out; } -int +bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -1070,7 +1070,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } else if (pgio->pg_lseg && req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) - return 0; + return false; return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } -- cgit v1.2.3 From 89a58e32d9105c01022a757fb32ddc3b51bf0025 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 25 May 2011 20:54:40 +0300 Subject: NFSv4.1: use pnfs_generic_pg_test directly by layout driver Signed-off-by: Benny Halevy --- fs/nfs/pnfs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 212fc292761a..d79f2df33a46 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1067,12 +1067,17 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, pgio->pg_count, access_type, gfp_flags); - } else if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) + return true; + } + + if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) return false; - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); + + return true; } +EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); /* * Called by non rpc-based layout drivers -- cgit v1.2.3 From a2e1d4f2e5ed83850de92a491ef225824cb457bd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Mon, 13 Jun 2011 18:54:53 -0400 Subject: nfs4.1: fix several problems with _pnfs_return_layout _pnfs_return_layout had the following problems: - it did not call pnfs_free_lseg_list on all paths - it unintentionally did a forgetful return when there was no outstanding io - it raced with concurrent LAYOUTGETS Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8c1309d852a6..25de6b27bdf4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -634,12 +634,14 @@ _pnfs_return_layout(struct inode *ino) spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { + if (!lo) { spin_unlock(&ino->i_lock); - dprintk("%s: no layout segments to return\n", __func__); - goto out; + dprintk("%s: no layout to return\n", __func__); + return status; } stateid = nfsi->layout->plh_stateid; + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + lo->plh_block_lgets++; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); spin_unlock(&ino->i_lock); -- cgit v1.2.3 From d771e3a43e23a37398b7e05a9d1b1036d698263c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 14 Jun 2011 16:30:16 -0400 Subject: NFSv4.1: fix break condition in pnfs_find_lseg The break condition to skip out of the loop got broken when cmp_layout was change. Essentially, we want to stop looking once we know no layout on the remainder of the list can match the first byte of the looked-up range. Reported-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 25de6b27bdf4..d066aad608ad 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -889,7 +889,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, ret = get_lseg(lseg); break; } - if (cmp_layout(range, &lseg->pls_range) > 0) + if (lseg->pls_range.offset > range->offset) break; } -- cgit v1.2.3 From 1ed3a8539af7b36aa5c977f304e80f7fc8d27bfc Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 15 Jun 2011 11:39:57 -0400 Subject: NFSv4.1: need to put_layout_hdr on _pnfs_return_layout error path We always get a reference on the layout header and we rely on nfs4_layoutreturn_release to put it. If we hit an allocation error before starting the rpc proc we bail out early without dereferncing the layout header properly. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d066aad608ad..8f9582281252 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -652,6 +652,7 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; + put_layout_hdr(lo); goto out; } -- cgit v1.2.3 From ea0ded748bdea78f9e2fefb571f7d6ce9edb4f89 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 15 Jun 2011 12:31:02 -0400 Subject: nfs4.1: prevent race that allowed use of freed layout in _pnfs_return_layout mark_matching_lsegs_invalid could put the last ref to the layout, so the get_layout_hdr needs to be called first. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8f9582281252..730d4dbbaf68 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -640,10 +640,10 @@ _pnfs_return_layout(struct inode *ino) return status; } stateid = nfsi->layout->plh_stateid; - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); - lo->plh_block_lgets++; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + lo->plh_block_lgets++; spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); -- cgit v1.2.3 From 9e2dfdb3081edfae66a49013517e80dd8a0469fa Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 15 Jun 2011 14:32:02 -0400 Subject: nfs4.1: mark layout as bad on error path in _pnfs_return_layout Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 730d4dbbaf68..539b94cb6c0f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -652,6 +652,8 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; + set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); + set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); put_layout_hdr(lo); goto out; } -- cgit v1.2.3 From 8f7d5efbef8718a774ac5e347b4ec069f17fd9b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:22 -0400 Subject: NFSv4.1: Fix some issues with pnfs_generic_pg_test 1. If the intention is to coalesce requests 'prev' and 'req' then we have to ensure at least that we have a layout starting at req_offset(prev). 2. If we're only requesting a minimal layout of length desc->pg_count, we need to test the length actually returned by the server before we allow the coalescing to occur. 3. We need to deal correctly with (pgio->lseg == NULL) 4. Fixup the test guarding the pnfs_update_layout. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 539b94cb6c0f..0f42e02436ee 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1064,19 +1064,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, gfp_flags = GFP_NOFS; } - if (pgio->pg_count == prev->wb_bytes) { + if (pgio->pg_lseg == NULL) { + if (pgio->pg_count != prev->wb_bytes) + return true; /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - req_offset(req), + req_offset(prev), pgio->pg_count, access_type, gfp_flags); - return true; + if (pgio->pg_lseg == NULL) + return true; } - if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) return false; -- cgit v1.2.3 From 19982ba8562e33083cb5bbb59a74855d8a9624ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: NFSv4.1: Fix an off-by-one error in pnfs_generic_pg_test And document what is going on there... Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0f42e02436ee..29c0ca7fc347 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1078,11 +1078,22 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return true; } - if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return false; - - return true; + /* + * Test if a nfs_page is fully contained in the pnfs_layout_range. + * Note that this test makes several assumptions: + * - that the previous nfs_page in the struct nfs_pageio_descriptor + * is known to lie within the range. + * - that the nfs_page being tested is known to be contiguous with the + * previous nfs_page. + * - Layout ranges are page aligned, so we only have to test the + * start offset of the request. + * + * Please also note that 'end_offset' is actually the offset of the + * first byte that lies outside the pnfs_layout_range. FIXME? + * + */ + return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.2.3