From 7a249cf83da1813cfa71cfe1e265b40045eceb47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2011 14:34:42 +0200 Subject: xfs: fix filesystsem freeze race in xfs_trans_alloc As pointed out by Jan xfs_trans_alloc can race with a concurrent filesystem freeze when it sleeps during the memory allocation. Fix this by moving the wait_for_freeze call after the memory allocation. This means moving the freeze into the low-level _xfs_trans_alloc helper, which thus grows a new argument. Also fix up some comments in that area while at it. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Reviewed-by: Dave Chinner --- fs/xfs/xfs_trans.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b33aae..2837220ea5cf 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -566,31 +566,24 @@ xfs_trans_init( /* * This routine is called to allocate a transaction structure. + * * The type parameter indicates the type of the transaction. These * are enumerated in xfs_trans.h. - * - * Dynamically allocate the transaction structure from the transaction - * zone, initialize it, and return it to the caller. */ -xfs_trans_t * -xfs_trans_alloc( - xfs_mount_t *mp, - uint type) -{ - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - return _xfs_trans_alloc(mp, type, KM_SLEEP); -} - -xfs_trans_t * +struct xfs_trans * _xfs_trans_alloc( - xfs_mount_t *mp, - uint type, - uint memflags) + struct xfs_mount *mp, + uint type, + uint memflags, + bool wait_for_freeze) { - xfs_trans_t *tp; + struct xfs_trans *tp; atomic_inc(&mp->m_active_trans); + if (wait_for_freeze) + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + tp = kmem_zone_zalloc(xfs_trans_zone, memflags); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; -- cgit v1.2.3 From b2ce39740066604288876c752d8170b3b17a21aa Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 11 Jul 2011 09:51:44 -0500 Subject: Revert "xfs: fix filesystsem freeze race in xfs_trans_alloc" This reverts commit 7a249cf83da1813cfa71cfe1e265b40045eceb47. That commit created a situation that could lead to a filesystem hang. As Dave Chinner pointed out, xfs_trans_alloc() could hold a reference to m_active_trans (i.e., keep it non-zero) and then wait for SB_FREEZE_TRANS to complete. Meanwhile a filesystem freeze request could set SB_FREEZE_TRANS and then wait for m_active_trans to drop to zero. Nobody benefits from this sequence of events... Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2837220ea5cf..c83f63b33aae 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -566,24 +566,31 @@ xfs_trans_init( /* * This routine is called to allocate a transaction structure. - * * The type parameter indicates the type of the transaction. These * are enumerated in xfs_trans.h. + * + * Dynamically allocate the transaction structure from the transaction + * zone, initialize it, and return it to the caller. */ -struct xfs_trans * +xfs_trans_t * +xfs_trans_alloc( + xfs_mount_t *mp, + uint type) +{ + xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + return _xfs_trans_alloc(mp, type, KM_SLEEP); +} + +xfs_trans_t * _xfs_trans_alloc( - struct xfs_mount *mp, - uint type, - uint memflags, - bool wait_for_freeze) + xfs_mount_t *mp, + uint type, + uint memflags) { - struct xfs_trans *tp; + xfs_trans_t *tp; atomic_inc(&mp->m_active_trans); - if (wait_for_freeze) - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - tp = kmem_zone_zalloc(xfs_trans_zone, memflags); tp->t_magic = XFS_TRANS_MAGIC; tp->t_type = type; -- cgit v1.2.3 From 1d8c95a363bf8cd4d4182dd19c01693b635311c2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 18 Jul 2011 03:40:16 +0000 Subject: xfs: use a cursor for bulk AIL insertion Delayed logging can insert tens of thousands of log items into the AIL at the same LSN. When the committing of log commit records occur, we can get insertions occurring at an LSN that is not at the end of the AIL. If there are thousands of items in the AIL on the tail LSN, each insertion has to walk the AIL to find the correct place to insert the new item into the AIL. This can consume large amounts of CPU time and block other operations from occurring while the traversals are in progress. To avoid this repeated walk, use a AIL cursor to record where we should be inserting the new items into the AIL without having to repeat the walk. The cursor infrastructure already provides this functionality for push walks, so is a simple extension of existing code. While this will not avoid the initial walk, it will avoid repeating it tens of thousands of times during a single checkpoint commit. This version includes logic improvements from Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b33aae..efc147f0e9b6 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1426,6 +1426,7 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. + * + * The AIL cursor is used to optimise the insert process. If commit_lsn is not + * at the end of the AIL, the insert cursor avoids the need to walk + * the AIL to find the insertion point on every xfs_log_item_batch_insert() + * call. This saves a lot of needless list walking and is a net win, even + * though it slightly increases that amount of AIL lock traffic to set it up + * and tear it down. */ void xfs_trans_committed_bulk( @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; + struct xfs_ail_cursor cur; int i = 0; + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); + spin_unlock(&ailp->xa_lock); + /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. + * we have the ail lock. Then unpin the item. This does + * not affect the AIL cursor the bulk insert path is + * using. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, + xfs_log_item_batch_insert(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); + + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* -- cgit v1.2.3