From 28da9fb4467f7a650cd31af6dfad3a4e4a3abf6e Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 21 Jun 2012 10:59:13 +0200 Subject: Btrfs: fix tree mod log for root replacements at leaf level For the tree mod log, we don't log any operations at leaf level. If the root is at the leaf level (i.e. the tree consists only of the root), then __tree_mod_log_oldest_root will find a ROOT_REPLACE operation in the log (because we always log that one no matter which level), but no other operations. With this patch __tree_mod_log_oldest_root exits cleanly instead of BUGging in this situation. get_old_root checks if its really a root at leaf level in case we don't have any operations and WARNs if this assumption breaks. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 15cbc2bf4ff0..7d1e4fc5fb6a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, if (!looped && !tm) return 0; /* - * we must have key remove operations in the log before the - * replace operation. + * if there are no tree operation for the oldest root, we simply + * return it. this should only happen if that (old) root is at + * level 0. */ - BUG_ON(!tm); + if (!tm) + break; + /* + * if there's an operation that's not a root replacement, we + * found the oldest version of our root. normally, we'll find a + * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. + */ if (tm->op != MOD_LOG_ROOT_REPLACE) break; @@ -1192,16 +1199,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } tm = tree_mod_log_search(root->fs_info, logical, time_seq); - /* - * there was an item in the log when __tree_mod_log_oldest_root - * returned. this one must not go away, because the time_seq passed to - * us must be blocking its removal. - */ - BUG_ON(!tm); - if (old_root) - eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, - root->nodesize); + eb = alloc_dummy_extent_buffer(logical, root->nodesize); else eb = btrfs_clone_extent_buffer(root->node); btrfs_tree_read_unlock(root->node); @@ -1216,7 +1215,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_set_header_level(eb, old_root->level); btrfs_set_header_generation(eb, old_generation); } - __tree_mod_log_rewind(eb, time_seq, tm); + if (tm) + __tree_mod_log_rewind(eb, time_seq, tm); + else + WARN_ON(btrfs_header_level(eb) != 0); extent_buffer_get(eb); return eb; -- cgit v1.2.3 From c3e0696523862c48b4d8c73ffb2867e9db478338 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 21 Jun 2012 11:01:06 +0200 Subject: Btrfs: always put insert_ptr modifications into the tree mod log Several callers of insert_ptr set the tree_mod_log parameter to 0 to avoid addition to the tree mod log. In fact, we need all of those operations. This commit simply removes the additional parameter and makes addition to the tree mod log unconditional. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7d1e4fc5fb6a..e005d9b04616 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2997,7 +2997,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, - int slot, int level, int tree_mod_log) + int slot, int level) { struct extent_buffer *lower; int nritems; @@ -3010,7 +3010,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != nritems) { - if (tree_mod_log && level) + if (level) tree_mod_log_eb_move(root->fs_info, lower, slot + 1, slot, nritems - slot); memmove_extent_buffer(lower, @@ -3018,7 +3018,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - if (tree_mod_log && level) { + if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, MOD_LOG_KEY_ADD); BUG_ON(ret < 0); @@ -3106,7 +3106,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(split); insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1, 1); + path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -3643,7 +3643,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -3850,7 +3850,7 @@ again: if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3859,7 +3859,7 @@ again: } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1], 1, 0); + path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; -- cgit v1.2.3 From 19956c7e94a7a58d6df8c4db5ae62f9109a7c663 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 22 Jun 2012 14:52:13 +0200 Subject: Btrfs: fix tree mod log rewind of ADD operations When a MOD_LOG_KEY_ADD operation is rewinded, we remove the key from the tree block. If its not the last key, removal involves a move operation. This move operation was explicitly done before this commit. However, at insertion time, there's a move operation before the actual addition to make room for the new key, which is recorded in the tree mod log as well. This means, we must drop the move operation when rewinding the add operation, because the next operation we'll be rewinding will be the corresponding MOD_LOG_MOVE_KEYS operation. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e005d9b04616..b98f8604f4f6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1094,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, tm->generation); break; case MOD_LOG_KEY_ADD: - if (tm->slot != n - 1) { - o_dst = btrfs_node_key_ptr_offset(tm->slot); - o_src = btrfs_node_key_ptr_offset(tm->slot + 1); - memmove_extent_buffer(eb, o_dst, o_src, p_size); - } + /* if a move operation is needed it's in the log */ n--; break; case MOD_LOG_MOVE_KEYS: -- cgit v1.2.3 From d42244a0d36ad0939c5f173ebf15841a0678899c Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 22 Jun 2012 14:51:15 +0200 Subject: Btrfs: resolve tree mod log locking issue in btrfs_next_leaf With the tree mod log, we may end up with two roots (the current root and a rewinded version of it) both pointing to two leaves, l1 and l2, of which l2 had already been cow-ed in the current transaction. If we don't rewind any tree blocks, we cannot have two roots both pointing to an already cowed tree block. Now there is btrfs_next_leaf, which has a leaf locked and wants a lock on the next (right) leaf. And there is push_leaf_left, which has a (cowed!) leaf locked and wants a lock on the previous (left) leaf. In order to solve this dead lock situation, we use try_lock in btrfs_next_leaf (only in case it's called with a tree mod log time_seq paramter) and if we fail to get a lock on the next leaf, we give up our lock on the current leaf and retry from the very beginning. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b98f8604f4f6..8206b3900587 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5119,6 +5119,18 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && time_seq) { + /* + * If we don't get the lock, we may be racing + * with push_leaf_left, holding that lock while + * itself waiting for the leaf we've currently + * locked. To solve this situation, we give up + * on our lock and cycle. + */ + btrfs_release_path(path); + cond_resched(); + goto again; + } if (!ret) { btrfs_set_path_blocking(path); btrfs_tree_read_lock(next); -- cgit v1.2.3