From 1dd594b21b2d98e56f2b1fe92bb222276b28de41 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 20 Mar 2006 13:44:04 -0500 Subject: NFS: Fix buglet in fs/nfs/write.c I've been reading through fs/nfs/write.c trying to track down a bug that seems to be related to pages loosing a refcount and getting freed too early (you interested in detail??) and I spotted a little bug which the following patch should fix. Signed-off-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9449b6835509..d6ad449041eb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -653,8 +653,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - if (error < 0) + if (error < 0) { + if (new) + nfs_release_request(new); return ERR_PTR(error); + } continue; } spin_unlock(&nfsi->req_lock); -- cgit v1.2.3 From bd6475454c774bd9dbe6078d94bbf72b1d3b65f4 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 20 Mar 2006 13:44:10 -0500 Subject: NFS: kzalloc conversion in fs/nfs this converts fs/nfs to kzalloc() usage. compile tested with make allyesconfig Signed-off-by: Eric Sesterhenn Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d6ad449041eb..92ecf24455c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,10 +100,8 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) p->pagevec = &p->page_array[0]; else { size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kzalloc(size, GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_commit_mempool); p = NULL; } -- cgit v1.2.3 From 91d5b47023b608227d605d1e916b29dd0215bff7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:14 -0500 Subject: NFS: add I/O performance counters Invoke the byte and event counter macros where we want to count bytes and events. Clean-up: fix a possible NULL dereference in nfs_lock, and simplify nfs_file_open. Test-plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 92ecf24455c3..e7c8361cf201 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,6 +63,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -134,6 +135,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); } @@ -223,6 +225,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, wdata->args.pgbase += result; written += result; count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); } while (count); /* Update file length */ nfs_grow_file(page, offset, written); @@ -279,6 +282,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int priority = wb_priority(wbc); int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -343,6 +349,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + err = generic_writepages(mapping, wbc); if (err) return err; @@ -354,6 +362,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; + nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); @@ -596,6 +605,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; + + nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + if (intr) { struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); sigset_t oldset; @@ -749,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, struct nfs_page *req; int status = 0; + nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, count, @@ -1152,6 +1166,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1177,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + /* Has the server at least made some progress? */ if (resp->count != 0) { /* Was this an NFSv2 write or an NFSv3 stable write? */ -- cgit v1.2.3 From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS write code in preparation for asynchronous o_direct This patch inverts the callback hierarchy for NFS write calls. Instead of having the NFSv2/v3/v4-specific code set up the RPC callback ops, we allow the original caller to do so. This allows for more flexibility w.r.t. how to set up and tear down the nfs_write_data structure while still allowing the NFSv3/v4 code to perform error handling. The greater flexibility is needed by the asynchronous O_DIRECT code, which wants to be able to hold on to the original nfs_write_data structures after the WRITE RPC call has completed in order to be able to replay them if the COMMIT call determines that the server has rebooted. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 92 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 26 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e7c8361cf201..5912274ff1a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -77,12 +77,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_writeback_done_partial(struct nfs_write_data *, int); -static void nfs_writeback_done_full(struct nfs_write_data *, int); +static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how); +static const struct rpc_call_ops nfs_write_partial_ops; +static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_commit_ops; static kmem_cache_t *nfs_wdata_cachep; mempool_t *nfs_wdata_mempool; @@ -872,10 +874,12 @@ static inline int flush_task_priority(int how) */ static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, int how) { struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -896,6 +900,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->write_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -959,14 +966,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_writeback_done_partial; if (nbytes > wsize) { - nfs_write_rpcsetup(req, data, wsize, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + wsize, offset, how); offset += wsize; nbytes -= wsize; } else { - nfs_write_rpcsetup(req, data, nbytes, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + nbytes, offset, how); nbytes = 0; } nfs_execute_write(data); @@ -1020,9 +1028,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_writeback_done_full; /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, count, 0, how); + nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); nfs_execute_write(data); return 0; @@ -1066,8 +1073,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how) /* * Handle a write reply that flushed part of a page. */ -static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) +static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; @@ -1077,11 +1085,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (nfs_writeback_done(task, data) != 0) + return; + + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; - dprintk(", error = %d\n", status); + req->wb_context->error = task->tk_status; + dprintk(", error = %d\n", task->tk_status); } else { #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (data->verf.committed < NFS_FILE_SYNC) { @@ -1102,6 +1113,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) nfs_writepage_release(req); } +static const struct rpc_call_ops nfs_write_partial_ops = { + .rpc_call_done = nfs_writeback_done_partial, + .rpc_release = nfs_writedata_release, +}; + /* * Handle a write reply that flushes a whole page. * @@ -1109,11 +1125,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) * writebacks since the page->count is kept > 1 for as long * as the page has a write request pending. */ -static void nfs_writeback_done_full(struct nfs_write_data *data, int status) +static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req; struct page *page; + if (nfs_writeback_done(task, data) != 0) + return; + /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1126,13 +1146,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; + req->wb_context->error = task->tk_status; end_page_writeback(page); nfs_inode_remove_request(req); - dprintk(", error = %d\n", status); + dprintk(", error = %d\n", task->tk_status); goto next; } end_page_writeback(page); @@ -1154,18 +1174,28 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) } } +static const struct rpc_call_ops nfs_write_full_ops = { + .rpc_call_done = nfs_writeback_done_full, + .rpc_release = nfs_writedata_release, +}; + + /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, void *calldata) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + int status; dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + status = NFS_PROTO(data->inode)->write_done(task, data); + if (status != 0) + return status; nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -1210,7 +1240,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) argp->stable = NFS_FILE_SYNC; } rpc_restart_call(task); - return; + return -EAGAIN; } if (time_before(complain, jiffies)) { printk(KERN_WARNING @@ -1221,11 +1251,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - - /* - * Process the nfs_page list - */ - data->complete(data, task->tk_status); + return 0; } @@ -1239,10 +1265,12 @@ void nfs_commit_release(void *wdata) * Set up the argument/result storage required for the RPC call. */ static void nfs_commit_rpcsetup(struct list_head *head, - struct nfs_write_data *data, int how) + struct nfs_write_data *data, + int how) { struct nfs_page *first; struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1262,7 +1290,10 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - + + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); NFS_PROTO(inode)->commit_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -1303,7 +1334,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* * COMMIT call returned */ -void nfs_commit_done(struct rpc_task *task, void *calldata) +static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_page *req; @@ -1312,6 +1343,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); @@ -1345,6 +1380,11 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) } sub_page_state(nr_unstable,res); } + +static const struct rpc_call_ops nfs_commit_ops = { + .rpc_call_done = nfs_commit_done, + .rpc_release = nfs_commit_release, +}; #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, -- cgit v1.2.3 From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:32 -0500 Subject: NFS: make direct write path generate write requests concurrently Duplicate infrastructure from direct read path that will allow write path to generate multiple write requests concurrently. This will enable us to add support for aio in this path. Temporarily we will lose the ability to do UNSTABLE writes followed by a COMMIT in the direct write path. However, all applications I am aware of that use NFS O_DIRECT currently write in relatively small chunks, so this should not be inconvenient in any way. Test plan: Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5912274ff1a1..875f5b060533 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -77,7 +77,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int); -static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1183,7 +1182,7 @@ static const struct rpc_call_ops nfs_write_full_ops = { /* * This function is called when the WRITE call is complete. */ -static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; -- cgit v1.2.3 From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:35 -0500 Subject: NFS: Make nfs_commit_alloc() extern We need to use nfs_commit_alloc() in fs/nfs/direct.c. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 875f5b060533..f7c8be0beccf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -91,7 +91,7 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); @@ -112,7 +112,7 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) return p; } -static inline void nfs_commit_free(struct nfs_write_data *p) +void nfs_commit_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); -- cgit v1.2.3 From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:37 -0500 Subject: NFS: Uninline nfs_writedata_(alloc|free) and nfs_readdata_(alloc|free) Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f7c8be0beccf..647e3217c2e1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -86,7 +86,7 @@ static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; static kmem_cache_t *nfs_wdata_cachep; -mempool_t *nfs_wdata_mempool; +static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); @@ -119,6 +119,36 @@ void nfs_commit_free(struct nfs_write_data *p) mempool_free(p, nfs_commit_mempool); } +struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_wdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_writedata_free(struct nfs_write_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_wdata_mempool); +} + void nfs_writedata_release(void *wdata) { nfs_writedata_free(wdata); -- cgit v1.2.3 From deb7d638262019cbac5d15ab74ffd1c29242c7cb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:50 -0500 Subject: NFS: Fix a race with PG_private and nfs_release_page() We don't need to set PG_private for readahead pages, since they never get unlocked while I/O is in progress. However there is a small race in nfs_readpage_release() whereby the page may be unlocked, and have PG_private set. Fix is to have PG_private set only for the case of writes... Also fix a bug in nfs_clear_page_writeback(): Don't attempt to clear the radix_tree tag if we've already deleted the radix tree entry. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 647e3217c2e1..d9e5ee594572 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -429,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + SetPagePrivate(req->wb_page); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -445,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); + ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { -- cgit v1.2.3 From 7d46a49f512e8d10b23353781a8ba85edd4fa640 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:50 -0500 Subject: NFS: Clean up nfs_flush_list() Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 62 +++++++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d9e5ee594572..2beb1268bb1b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -963,7 +963,7 @@ static void nfs_execute_write(struct nfs_write_data *data) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -1032,16 +1032,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; unsigned int count; - if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1074,24 +1071,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) return -ENOMEM; } -static int -nfs_flush_list(struct list_head *head, int wpages, int how) +static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) { LIST_HEAD(one_request); - struct nfs_page *req; - int error = 0; - unsigned int pages = 0; + int (*flush_one)(struct inode *, struct list_head *, int); + struct nfs_page *req; + int wpages = NFS_SERVER(inode)->wpages; + int wsize = NFS_SERVER(inode)->wsize; + int error; - while (!list_empty(head)) { - pages += nfs_coalesce_requests(head, &one_request, wpages); + flush_one = nfs_flush_one; + if (wsize < PAGE_CACHE_SIZE) + flush_one = nfs_flush_multi; + /* For single writes, FLUSH_STABLE is more efficient */ + if (npages <= wpages && npages == NFS_I(inode)->npages + && nfs_list_entry(head->next)->wb_bytes <= wsize) + how |= FLUSH_STABLE; + + do { + nfs_coalesce_requests(head, &one_request, wpages); req = nfs_list_entry(one_request.next); - error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); + error = flush_one(inode, &one_request, how); if (error < 0) - break; - } - if (error >= 0) - return pages; - + goto out_err; + } while (!list_empty(head)); + return 0; +out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -1423,24 +1428,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); if (res) { - struct nfs_server *server = NFS_SERVER(inode); - - /* For single writes, FLUSH_STABLE is more efficient */ - if (res == nfsi->npages && nfsi->npages <= server->wpages) { - if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) - how |= FLUSH_STABLE; - } - error = nfs_flush_list(&head, server->wpages, how); + int error = nfs_flush_list(inode, &head, res, how); + if (error < 0) + return error; } - if (error < 0) - return error; return res; } @@ -1449,14 +1446,13 @@ int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(inode, &head, how); + int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } -- cgit v1.2.3 From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:51 -0500 Subject: NFS: Fix a race in nfs_sync_inode() Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 72 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 22 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2beb1268bb1b..3f5225404c97 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req) * * Interruptible by signals only if mounted with intr flag. */ -static int -nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; @@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int else idx_end = idx_start + npages - 1; - spin_lock(&nfsi->req_lock); next = idx_start; while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) @@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); + spin_lock(&nfsi->req_lock); if (error < 0) return error; - spin_lock(&nfsi->req_lock); res++; } - spin_unlock(&nfsi->req_lock); return res; } +static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret; + + spin_lock(&nfsi->req_lock); + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + spin_unlock(&nfsi->req_lock); + return ret; +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st } return res; } +#else +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + return 0; +} #endif static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) @@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how) } #endif -int nfs_sync_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; - int wait = how & FLUSH_WAIT; - int error; - - how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); + int pages, ret; + how &= ~FLUSH_NOCOMMIT; + spin_lock(&nfsi->req_lock); do { - if (wait) { - error = nfs_wait_on_requests(inode, idx_start, npages); - if (error != 0) - continue; - } - error = nfs_flush_inode(inode, idx_start, npages, how); - if (error != 0) + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + if (ret != 0) continue; - if (!nocommit) - error = nfs_commit_inode(inode, how); - } while (error > 0); - return error; + pages = nfs_scan_dirty(inode, &head, idx_start, npages); + if (pages != 0) { + spin_unlock(&nfsi->req_lock); + ret = nfs_flush_list(inode, &head, pages, how); + spin_lock(&nfsi->req_lock); + continue; + } + if (nocommit) + break; + pages = nfs_scan_commit(inode, &head, 0, 0); + if (pages == 0) + break; + spin_unlock(&nfsi->req_lock); + ret = nfs_commit_list(inode, &head, how); + spin_lock(&nfsi->req_lock); + } while (ret >= 0); + spin_unlock(&nfsi->req_lock); + return ret; } int nfs_init_writepagecache(void) -- cgit v1.2.3 From 93d2341c750cda0df48a6cc67b35fe25f1ec47df Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:50 -0800 Subject: [PATCH] mempool: use mempool_create_slab_pool() Modify well over a dozen mempool users to call mempool_create_slab_pool() rather than calling mempool_create() with extra arguments, saving about 30 lines of code and increasing readability. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3f5225404c97..4cfada2cc09f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1521,17 +1521,13 @@ int nfs_init_writepagecache(void) if (nfs_wdata_cachep == NULL) return -ENOMEM; - nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, + nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) return -ENOMEM; - nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, + nfs_wdata_cachep); if (nfs_commit_mempool == NULL) return -ENOMEM; -- cgit v1.2.3