From 0e57f6a36f9be03e5abb755f524ee91c4aebe854 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 20 Dec 2010 12:02:19 +1100 Subject: xfs: bulk AIL insertion during transaction commit When inserting items into the AIL from the transaction committed callbacks, we take the AIL lock for every single item that is to be inserted. For a CIL checkpoint commit, this can be tens of thousands of individual inserts, yet almost all of the items will be inserted at the same point in the AIL because they have the same index. To reduce the overhead and contention on the AIL lock for such operations, introduce a "bulk insert" operation which allows a list of log items with the same LSN to be inserted in a single operation via a list splice. To do this, we need to pre-sort the log items being committed into a temporary list for insertion. The complexity is that not every log item will end up with the same LSN, and not every item is actually inserted into the AIL. Items that don't match the commit LSN will be inserted and unpinned as per the current one-at-a-time method (relatively rare), while items that are not to be inserted will be unpinned and freed immediately. Items that are to be inserted at the given commit lsn are placed in a temporary array and inserted into the AIL in bulk each time the array fills up. As a result of this, we trade off AIL hold time for a significant reduction in traffic. lock_stat output shows that the worst case hold time is unchanged, but contention from AIL inserts drops by an order of magnitude and the number of lock traversal decreases significantly. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_trans.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f6d956b7711e..f80a067a4658 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs( * they could be immediately flushed and we'd have to race with the flusher * trying to pull the item from the AIL as we add it. */ -void +static void xfs_trans_item_committed( struct xfs_log_item *lip, xfs_lsn_t commit_lsn, @@ -1425,6 +1425,83 @@ xfs_trans_committed( xfs_trans_free(tp); } +static inline void +xfs_log_item_batch_insert( + struct xfs_ail *ailp, + struct xfs_log_item **log_items, + int nr_items, + xfs_lsn_t commit_lsn) +{ + int i; + + spin_lock(&ailp->xa_lock); + /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ + xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + + for (i = 0; i < nr_items; i++) + IOP_UNPIN(log_items[i], 0); +} + +/* + * Bulk operation version of xfs_trans_committed that takes a log vector of + * items to insert into the AIL. This uses bulk AIL insertion techniques to + * minimise lock traffic. + */ +void +xfs_trans_committed_bulk( + struct xfs_ail *ailp, + struct xfs_log_vec *log_vector, + xfs_lsn_t commit_lsn, + int aborted) +{ +#define LOG_ITEM_BATCH_SIZE 32 + struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; + struct xfs_log_vec *lv; + int i = 0; + + /* unpin all the log items */ + for (lv = log_vector; lv; lv = lv->lv_next ) { + struct xfs_log_item *lip = lv->lv_item; + xfs_lsn_t item_lsn; + + if (aborted) + lip->li_flags |= XFS_LI_ABORTED; + item_lsn = IOP_COMMITTED(lip, commit_lsn); + + /* item_lsn of -1 means the item was freed */ + if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) + continue; + + if (item_lsn != commit_lsn) { + + /* + * Not a bulk update option due to unusual item_lsn. + * Push into AIL immediately, rechecking the lsn once + * we have the ail lock. Then unpin the item. + */ + spin_lock(&ailp->xa_lock); + if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) + xfs_trans_ail_update(ailp, lip, item_lsn); + else + spin_unlock(&ailp->xa_lock); + IOP_UNPIN(lip, 0); + continue; + } + + /* Item is a candidate for bulk AIL insert. */ + log_items[i++] = lv->lv_item; + if (i >= LOG_ITEM_BATCH_SIZE) { + xfs_log_item_batch_insert(ailp, log_items, + LOG_ITEM_BATCH_SIZE, commit_lsn); + i = 0; + } + } + + /* make sure we insert the remainder! */ + if (i) + xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); +} + /* * Called from the trans_commit code when we notice that * the filesystem is in the middle of a forced shutdown. -- cgit v1.2.3 From 1884bd8354c9aec4ca501dc4773c13ad2a09af7b Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 25 Dec 2010 20:14:53 +0000 Subject: xfs: fix an assignment within an ASSERT() In fs/xfs/xfs_trans.c::xfs_trans_unreserve_and_mod_sb() at the out: label we have this: ASSERT(error = 0); I believe a comparison was intended, not an assignment. If I'm right, the patch below fixes that up. Signed-off-by: Jesper Juhl Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f80a067a4658..33dbc4e0ad62 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1137,7 +1137,7 @@ out_undo_fdblocks: if (blkdelta) xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); out: - ASSERT(error = 0); + ASSERT(error == 0); return; } -- cgit v1.2.3 From e34a314c5e49fe6b763568f6576b19f1299c33c2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jan 2011 12:13:35 +1100 Subject: xfs: fix efi item leak on forced shutdown After test 139, kmemleak shows: unreferenced object 0xffff880078b405d8 (size 400): comm "xfs_io", pid 4904, jiffies 4294909383 (age 1186.728s) hex dump (first 32 bytes): 60 c1 17 79 00 88 ff ff 60 c1 17 79 00 88 ff ff `..y....`..y.... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x2d/0x60 [] kmem_cache_alloc+0x13f/0x2b0 [] kmem_zone_alloc+0x77/0xf0 [] kmem_zone_zalloc+0x1e/0x50 [] xfs_efi_init+0x4b/0xb0 [] xfs_trans_get_efi+0x58/0x90 [] xfs_bmap_finish+0x8b/0x1d0 [] xfs_itruncate_finish+0x2c4/0x5d0 [] xfs_setattr+0x8df/0xa70 [] xfs_vn_setattr+0x1b/0x20 [] notify_change+0x170/0x2e0 [] do_truncate+0x66/0xa0 [] sys_ftruncate+0xdb/0xe0 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff The cause of the leak is that the "remove" parameter of IOP_UNPIN() is never set when a CIL push is aborted. This means that the EFI item is never freed if it was in the push being cancelled. The problem is specific to delayed logging, but has uncovered a couple of problems with the handling of IOP_UNPIN(remove). Firstly, we cannot safely call xfs_trans_del_item() from IOP_UNPIN() in the CIL commit failure path or the iclog write failure path because for delayed loging we have no transaction context. Hence we must only call xfs_trans_del_item() if the log item being unpinned has an active log item descriptor. Secondly, xfs_trans_uncommit() does not handle log item descriptor freeing during the traversal of log items on a transaction. It can reference a freed log item descriptor when unpinning an EFI item. Hence it needs to use a safe list traversal method to allow items to be removed from the transaction during IOP_UNPIN(). Signed-off-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/xfs_trans.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 33dbc4e0ad62..29f5e5424897 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert( * Bulk operation version of xfs_trans_committed that takes a log vector of * items to insert into the AIL. This uses bulk AIL insertion techniques to * minimise lock traffic. + * + * If we are called with the aborted flag set, it is because a log write during + * a CIL checkpoint commit has failed. In this case, all the items in the + * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which + * means that checkpoint commit abort handling is treated exactly the same + * as an iclog write error even though we haven't started any IO yet. Hence in + * this case all we need to do is IOP_COMMITTED processing, followed by an + * IOP_UNPIN(aborted) call. */ void xfs_trans_committed_bulk( @@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk( if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; + /* + * if we are aborting the operation, no point in inserting the + * object into the AIL as we are in a shutdown situation. + */ + if (aborted) { + ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); + IOP_UNPIN(lip, 1); + continue; + } + if (item_lsn != commit_lsn) { /* @@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk( } /* - * Called from the trans_commit code when we notice that - * the filesystem is in the middle of a forced shutdown. + * Called from the trans_commit code when we notice that the filesystem is in + * the middle of a forced shutdown. + * + * When we are called here, we have already pinned all the items in the + * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called + * so we can simply walk the items in the transaction, unpin them with an abort + * flag and then free the items. Note that unpinning the items can result in + * them being freed immediately, so we need to use a safe list traversal method + * here. */ STATIC void xfs_trans_uncommit( struct xfs_trans *tp, uint flags) { - struct xfs_log_item_desc *lidp; + struct xfs_log_item_desc *lidp, *n; - list_for_each_entry(lidp, &tp->t_items, lid_trans) { - /* - * Unpin all but those that aren't dirty. - */ + list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { if (lidp->lid_flags & XFS_LID_DIRTY) IOP_UNPIN(lidp->lid_item, 1); } -- cgit v1.2.3 From c6f990d1ff8e4e53b12f4175eb7d7ea710c3ca73 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jan 2011 13:23:28 +1100 Subject: xfs: handle CIl transaction commit failures correctly Failure to commit a transaction into the CIL is not handled correctly. This currently can only happen when racing with a shutdown and requires an explicit shutdown check, so it rare and can be avoided. Remove the shutdown check and make the CIL commit a void function to indicate it will always succeed, thereby removing the incorrectly handled failure case. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/xfs_trans.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 29f5e5424897..76922793f64f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1755,7 +1755,6 @@ xfs_trans_commit_cil( int flags) { struct xfs_log_vec *log_vector; - int error; /* * Get each log item to allocate a vector structure for @@ -1766,9 +1765,7 @@ xfs_trans_commit_cil( if (!log_vector) return ENOMEM; - error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); - if (error) - return error; + xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free(tp); -- cgit v1.2.3