diff options
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r-- | drivers/md/persistent-data/Kconfig | 2 | ||||
-rw-r--r-- | drivers/md/persistent-data/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 808 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.h | 166 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 163 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 165 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 17 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-internal.h | 17 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 50 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-spine.c | 27 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 83 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 17 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 16 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 2 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 21 |
15 files changed, 1468 insertions, 88 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index ceb359050a59..19b268795415 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -1,6 +1,6 @@ config DM_PERSISTENT_DATA tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM select LIBCRC32C select DM_BUFIO ---help--- diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index d8e7cb767c1e..ff528792c358 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o dm-persistent-data-objs := \ + dm-array.o \ + dm-bitset.o \ dm-block-manager.o \ dm-space-map-common.o \ dm-space-map-disk.o \ diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c new file mode 100644 index 000000000000..172147eb1d40 --- /dev/null +++ b/drivers/md/persistent-data/dm-array.c @@ -0,0 +1,808 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-array.h" +#include "dm-space-map.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "array" + +/*----------------------------------------------------------------*/ + +/* + * The array is implemented as a fully populated btree, which points to + * blocks that contain the packed values. This is more space efficient + * than just using a btree since we don't store 1 key per value. + */ +struct array_block { + __le32 csum; + __le32 max_entries; + __le32 nr_entries; + __le32 value_size; + __le64 blocknr; /* Block this node is supposed to live in. */ +} __packed; + +/*----------------------------------------------------------------*/ + +/* + * Validator methods. As usual we calculate a checksum, and also write the + * block location into the header (paranoia about ssds remapping areas by + * mistake). + */ +#define CSUM_XOR 595846735 + +static void array_block_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + + bh_le->blocknr = cpu_to_le64(dm_block_location(b)); + bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); +} + +static int array_block_check(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + __le32 csum_disk; + + if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) { + DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu", + (unsigned long long) le64_to_cpu(bh_le->blocknr), + (unsigned long long) dm_block_location(b)); + return -ENOTBLK; + } + + csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); + if (csum_disk != bh_le->csum) { + DMERR_LIMIT("array_block_check failed: csum %u != wanted %u", + (unsigned) le32_to_cpu(csum_disk), + (unsigned) le32_to_cpu(bh_le->csum)); + return -EILSEQ; + } + + return 0; +} + +static struct dm_block_validator array_validator = { + .name = "array", + .prepare_for_write = array_block_prepare_for_write, + .check = array_block_check +}; + +/*----------------------------------------------------------------*/ + +/* + * Functions for manipulating the array blocks. + */ + +/* + * Returns a pointer to a value within an array block. + * + * index - The index into _this_ specific block. + */ +static void *element_at(struct dm_array_info *info, struct array_block *ab, + unsigned index) +{ + unsigned char *entry = (unsigned char *) (ab + 1); + + entry += index * info->value_type.size; + + return entry; +} + +/* + * Utility function that calls one of the value_type methods on every value + * in an array block. + */ +static void on_entries(struct dm_array_info *info, struct array_block *ab, + void (*fn)(void *, const void *)) +{ + unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); + + for (i = 0; i < nr_entries; i++) + fn(info->value_type.context, element_at(info, ab, i)); +} + +/* + * Increment every value in an array block. + */ +static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->inc) + on_entries(info, ab, vt->inc); +} + +/* + * Decrement every value in an array block. + */ +static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->dec) + on_entries(info, ab, vt->dec); +} + +/* + * Each array block can hold this many values. + */ +static uint32_t calc_max_entries(size_t value_size, size_t size_of_block) +{ + return (size_of_block - sizeof(struct array_block)) / value_size; +} + +/* + * Allocate a new array block. The caller will need to unlock block. + */ +static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_new_block(info->btree_info.tm, &array_validator, block); + if (r) + return r; + + (*ab) = dm_block_data(*block); + (*ab)->max_entries = cpu_to_le32(max_entries); + (*ab)->nr_entries = cpu_to_le32(0); + (*ab)->value_size = cpu_to_le32(info->value_type.size); + + return 0; +} + +/* + * Pad an array block out with a particular value. Every instance will + * cause an increment of the value_type. new_nr must always be more than + * the current number of entries. + */ +static void fill_ablock(struct dm_array_info *info, struct array_block *ab, + const void *value, unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i < new_nr; i++) { + if (vt->inc) + vt->inc(vt->context, value); + memcpy(element_at(info, ab, i), value, vt->size); + } + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Remove some entries from the back of an array block. Every value + * removed will be decremented. new_nr must be <= the current number of + * entries. + */ +static void trim_ablock(struct dm_array_info *info, struct array_block *ab, + unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i > new_nr; i--) + if (vt->dec) + vt->dec(vt->context, element_at(info, ab, i - 1)); + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Read locks a block, and coerces it to an array block. The caller must + * unlock 'block' when finished. + */ +static int get_ablock(struct dm_array_info *info, dm_block_t b, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block); + if (r) + return r; + + *ab = dm_block_data(*block); + return 0; +} + +/* + * Unlocks an array block. + */ +static int unlock_ablock(struct dm_array_info *info, struct dm_block *block) +{ + return dm_tm_unlock(info->btree_info.tm, block); +} + +/*----------------------------------------------------------------*/ + +/* + * Btree manipulation. + */ + +/* + * Looks up an array block in the btree, and then read locks it. + * + * index is the index of the index of the array_block, (ie. the array index + * / max_entries). + */ +static int lookup_ablock(struct dm_array_info *info, dm_block_t root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r; + uint64_t key = index; + __le64 block_le; + + r = dm_btree_lookup(&info->btree_info, root, &key, &block_le); + if (r) + return r; + + return get_ablock(info, le64_to_cpu(block_le), block, ab); +} + +/* + * Insert an array block into the btree. The block is _not_ unlocked. + */ +static int insert_ablock(struct dm_array_info *info, uint64_t index, + struct dm_block *block, dm_block_t *root) +{ + __le64 block_le = cpu_to_le64(dm_block_location(block)); + + __dm_bless_for_disk(block_le); + return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); +} + +/* + * Looks up an array block in the btree. Then shadows it, and updates the + * btree to point to this new shadow. 'root' is an input/output parameter + * for both the current root block, and the new one. + */ +static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r, inc; + uint64_t key = index; + dm_block_t b; + __le64 block_le; + + /* + * lookup + */ + r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); + if (r) + return r; + b = le64_to_cpu(block_le); + + /* + * shadow + */ + r = dm_tm_shadow_block(info->btree_info.tm, b, + &array_validator, block, &inc); + if (r) + return r; + + *ab = dm_block_data(*block); + if (inc) + inc_ablock_entries(info, *ab); + + /* + * Reinsert. + * + * The shadow op will often be a noop. Only insert if it really + * copied data. + */ + if (dm_block_location(*block) != b) + r = insert_ablock(info, index, *block, root); + + return r; +} + +/* + * Allocate an new array block, and fill it with some values. + */ +static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + unsigned block_index, uint32_t nr, + const void *value, dm_block_t *root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); + if (r) + return r; + + fill_ablock(info, ab, value, nr); + r = insert_ablock(info, block_index, block, root); + unlock_ablock(info, block); + + return r; +} + +static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block, + unsigned begin_block, unsigned end_block, + unsigned max_entries, const void *value, + dm_block_t *root) +{ + int r = 0; + + for (; !r && begin_block != end_block; begin_block++) + r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root); + + return r; +} + +/* + * There are a bunch of functions involved with resizing an array. This + * structure holds information that commonly needed by them. Purely here + * to reduce parameter count. + */ +struct resize { + /* + * Describes the array. + */ + struct dm_array_info *info; + + /* + * The current root of the array. This gets updated. + */ + dm_block_t root; + + /* + * Metadata block size. Used to calculate the nr entries in an + * array block. + */ + size_t size_of_block; + + /* + * Maximum nr entries in an array block. + */ + unsigned max_entries; + + /* + * nr of completely full blocks in the array. + * + * 'old' refers to before the resize, 'new' after. + */ + unsigned old_nr_full_blocks, new_nr_full_blocks; + + /* + * Number of entries in the final block. 0 iff only full blocks in + * the array. + */ + unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block; + + /* + * The default value used when growing the array. + */ + const void *value; +}; + +/* + * Removes a consecutive set of array blocks from the btree. The values + * in block are decremented as a side effect of the btree remove. + * + * begin_index - the index of the first array block to remove. + * end_index - the one-past-the-end value. ie. this block is not removed. + */ +static int drop_blocks(struct resize *resize, unsigned begin_index, + unsigned end_index) +{ + int r; + + while (begin_index != end_index) { + uint64_t key = begin_index++; + r = dm_btree_remove(&resize->info->btree_info, resize->root, + &key, &resize->root); + if (r) + return r; + } + + return 0; +} + +/* + * Calculates how many blocks are needed for the array. + */ +static unsigned total_nr_blocks_needed(unsigned nr_full_blocks, + unsigned nr_entries_in_last_block) +{ + return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0); +} + +/* + * Shrink an array. + */ +static int shrink(struct resize *resize) +{ + int r; + unsigned begin, end; + struct dm_block *block; + struct array_block *ab; + + /* + * Lose some blocks from the back? + */ + if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) { + begin = total_nr_blocks_needed(resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block); + end = total_nr_blocks_needed(resize->old_nr_full_blocks, + resize->old_nr_entries_in_last_block); + + r = drop_blocks(resize, begin, end); + if (r) + return r; + } + + /* + * Trim the new tail block + */ + if (resize->new_nr_entries_in_last_block) { + r = shadow_ablock(resize->info, &resize->root, + resize->new_nr_full_blocks, &block, &ab); + if (r) + return r; + + trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block); + unlock_ablock(resize->info, block); + } + + return 0; +} + +/* + * Grow an array. + */ +static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = shadow_ablock(resize->info, &resize->root, + resize->old_nr_full_blocks, &block, &ab); + if (r) + return r; + + fill_ablock(resize->info, ab, resize->value, new_nr_entries); + unlock_ablock(resize->info, block); + + return r; +} + +static int grow_add_tail_block(struct resize *resize) +{ + return insert_new_ablock(resize->info, resize->size_of_block, + resize->max_entries, + resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block, + resize->value, &resize->root); +} + +static int grow_needs_more_blocks(struct resize *resize) +{ + int r; + + if (resize->old_nr_entries_in_last_block > 0) { + r = grow_extend_tail_block(resize, resize->max_entries); + if (r) + return r; + } + + r = insert_full_ablocks(resize->info, resize->size_of_block, + resize->old_nr_full_blocks, + resize->new_nr_full_blocks, + resize->max_entries, resize->value, + &resize->root); + if (r) + return r; + + if (resize->new_nr_entries_in_last_block) + r = grow_add_tail_block(resize); + + return r; +} + +static int grow(struct resize *resize) +{ + if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) + return grow_needs_more_blocks(resize); + + else if (resize->old_nr_entries_in_last_block) + return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block); + + else + return grow_add_tail_block(resize); +} + +/*----------------------------------------------------------------*/ + +/* + * These are the value_type functions for the btree elements, which point + * to array blocks. + */ +static void block_inc(void *context, const void *value) +{ + __le64 block_le; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); +} + +static void block_dec(void *context, const void *value) +{ + int r; + uint64_t b; + __le64 block_le; + uint32_t ref_count; + struct dm_block *block; + struct array_block *ab; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + b = le64_to_cpu(block_le); + + r = dm_tm_ref(info->btree_info.tm, b, &ref_count); + if (r) { + DMERR_LIMIT("couldn't get reference count for block %llu", + (unsigned long long) b); + return; + } + + if (ref_count == 1) { + /* + * We're about to drop the last reference to this ablock. + * So we need to decrement the ref count of the contents. + */ + r = get_ablock(info, b, &block, &ab); + if (r) { + DMERR_LIMIT("couldn't get array block %llu", + (unsigned long long) b); + return; + } + + dec_ablock_entries(info, ab); + unlock_ablock(info, block); + } + + dm_tm_dec(info->btree_info.tm, b); +} + +static int block_equal(void *context, const void *value1, const void *value2) +{ + return !memcmp(value1, value2, sizeof(__le64)); +} + +/*----------------------------------------------------------------*/ + +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + struct dm_btree_value_type *bvt = &info->btree_info.value_type; + + memcpy(&info->value_type, vt, sizeof(info->value_type)); + info->btree_info.tm = tm; + info->btree_info.levels = 1; + + bvt->context = info; + bvt->size = sizeof(__le64); + bvt->inc = block_inc; + bvt->dec = block_dec; + bvt->equal = block_equal; +} +EXPORT_SYMBOL_GPL(dm_array_info_init); + +int dm_array_empty(struct dm_array_info *info, dm_block_t *root) +{ + return dm_btree_empty(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_empty); + +static int array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) +{ + int r; + struct resize resize; + + if (old_size == new_size) + return 0; + + resize.info = info; + resize.root = root; + resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + resize.max_entries = calc_max_entries(info->value_type.size, + resize.size_of_block); + + resize.old_nr_full_blocks = old_size / resize.max_entries; + resize.old_nr_entries_in_last_block = old_size % resize.max_entries; + resize.new_nr_full_blocks = new_size / resize.max_entries; + resize.new_nr_entries_in_last_block = new_size % resize.max_entries; + resize.value = value; + + r = ((new_size > old_size) ? grow : shrink)(&resize); + if (r) + return r; + + *new_root = resize.root; + return 0; +} + +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r = array_resize(info, root, old_size, new_size, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_resize); + +int dm_array_del(struct dm_array_info *info, dm_block_t root) +{ + return dm_btree_del(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_del); + +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value_le) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned entry, max_entries; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = lookup_ablock(info, root, index / max_entries, &block, &ab); + if (r) + return r; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) + r = -ENODATA; + else + memcpy(value_le, element_at(info, ab, entry), + info->value_type.size); + + unlock_ablock(info, block); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_get_value); + +static int array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned max_entries; + unsigned entry; + void *old_value; + struct dm_btree_value_type *vt = &info->value_type; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = shadow_ablock(info, &root, index / max_entries, &block, &ab); + if (r) + return r; + *new_root = root; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) { + r = -ENODATA; + goto out; + } + + old_value = element_at(info, ab, entry); + if (vt->dec && + (!vt->equal || !vt->equal(vt->context, old_value, value))) { + vt->dec(vt->context, old_value); + if (vt->inc) + vt->inc(vt->context, value); + } + + memcpy(old_value, value, info->value_type.size); + +out: + unlock_ablock(info, block); + return r; +} + +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r; + + r = array_set_value(info, root, index, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_set_value); + +struct walk_info { + struct dm_array_info *info; + int (*fn)(void *context, uint64_t key, void *leaf); + void *context; +}; + +static int walk_ablock(void *context, uint64_t *keys, void *leaf) +{ + struct walk_info *wi = context; + + int r; + unsigned i; + __le64 block_le; + unsigned nr_entries, max_entries; + struct dm_block *block; + struct array_block *ab; + + memcpy(&block_le, leaf, sizeof(block_le)); + r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab); + if (r) + return r; + + max_entries = le32_to_cpu(ab->max_entries); + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = 0; i < nr_entries; i++) { + r = wi->fn(wi->context, keys[0] * max_entries + i, + element_at(wi->info, ab, i)); + + if (r) + break; + } + + unlock_ablock(wi->info, block); + return r; +} + +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *, uint64_t key, void *leaf), + void *context) +{ + struct walk_info wi; + + wi.info = info; + wi.fn = fn; + wi.context = context; + + return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi); +} +EXPORT_SYMBOL_GPL(dm_array_walk); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h new file mode 100644 index 000000000000..ea177d6fa58f --- /dev/null +++ b/drivers/md/persistent-data/dm-array.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_ARRAY_H +#define _LINUX_DM_ARRAY_H + +#include "dm-btree.h" + +/*----------------------------------------------------------------*/ + +/* + * The dm-array is a persistent version of an array. It packs the data + * more efficiently than a btree which will result in less disk space use, + * and a performance boost. The element get and set operations are still + * O(ln(n)), but with a much smaller constant. + * + * The value type structure is reused from the btree type to support proper + * reference counting of values. + * + * The arrays implicitly know their length, and bounds are checked for + * lookups and updated. It doesn't store this in an accessible place + * because it would waste a whole metadata block. Make sure you store the + * size along with the array root in your encompassing data. + * + * Array entries are indexed via an unsigned integer starting from zero. + * Arrays are not sparse; if you resize an array to have 'n' entries then + * 'n - 1' will be the last valid index. + * + * Typical use: + * + * a) initialise a dm_array_info structure. This describes the array + * values and ties it into a specific transaction manager. It holds no + * instance data; the same info can be used for many similar arrays if + * you wish. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an array. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty array with dm_array_empty(). + * + * Like the other data structures in this library, dm_array objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * c) resize an array with dm_array_resize(). + * + * d) Get a value from the array with dm_array_get_value(). + * + * e) Set a value in the array with dm_array_set_value(). + * + * f) Walk an array of values in index order with dm_array_walk(). More + * efficient than making many calls to dm_array_get_value(). + * + * g) Destroy the array with dm_array_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_array_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Describes an array. Don't initialise this structure yourself, use the + * init function below. + */ +struct dm_array_info { + struct dm_transaction_manager *tm; + struct dm_btree_value_type value_type; + struct dm_btree_info btree_info; +}; + +/* + * Sets up a dm_array_info structure. You don't need to do anything with + * this structure when you finish using it. + * + * info - the structure being filled in. + * tm - the transaction manager that should supervise this structure. + * vt - describes the leaf values. + */ +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + +/* + * Create an empty, zero length array. + * + * info - describes the array + * root - on success this will be filled out with the root block + */ +int dm_array_empty(struct dm_array_info *info, dm_block_t *root); + +/* + * Resizes the array. + * + * info - describes the array + * root - the root block of the array on disk + * old_size - the caller is responsible for remembering the size of + * the array + * new_size - can be bigger or smaller than old_size + * value - if we're growing the array the new entries will have this value + * new_root - on success, points to the new root block + * + * If growing the inc function for 'value' will be called the appropriate + * number of times. So if the caller is holding a reference they may want + * to drop it. + */ +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Frees a whole array. The value_type's decrement operation will be called + * for all values in the array + */ +int dm_array_del(struct dm_array_info *info, dm_block_t root); + +/* + * Lookup a value in the array + * + * info - describes the array + * root - root block of the array + * index - array index + * value - the value to be read. Will be in on-disk format of course. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value); + +/* + * Set an entry in the array. + * + * info - describes the array + * root - root block of the array + * index - array index + * value - value to be written to disk. Make sure you confirm the value is + * in on-disk format with__dm_bless_for_disk() before calling. + * new_root - the new root block + * + * The old value being overwritten will be decremented, the new value + * incremented. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Walk through all the entries in an array. + * + * info - describes the array + * root - root block of the array + * fn - called back for every element + * context - passed to the callback + */ +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t key, void *leaf), + void *context); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_ARRAY_H */ diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c new file mode 100644 index 000000000000..cd9a86d4cdf0 --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-bitset.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "bitset" +#define BITS_PER_ARRAY_ENTRY 64 + +/*----------------------------------------------------------------*/ + +static struct dm_btree_value_type bitset_bvt = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL, +}; + +/*----------------------------------------------------------------*/ + +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info) +{ + dm_array_info_init(&info->array_info, tm, &bitset_bvt); + info->current_index_set = false; +} +EXPORT_SYMBOL_GPL(dm_disk_bitset_init); + +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) +{ + return dm_array_empty(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_empty); + +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root) +{ + uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY); + uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY); + __le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0); + + __dm_bless_for_disk(&value); + return dm_array_resize(&info->array_info, root, old_blocks, new_blocks, + &value, new_root); +} +EXPORT_SYMBOL_GPL(dm_bitset_resize); + +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root) +{ + return dm_array_del(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_del); + +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root) +{ + int r; + __le64 value; + + if (!info->current_index_set) + return 0; + + value = cpu_to_le64(info->current_bits); + + __dm_bless_for_disk(&value); + r = dm_array_set_value(&info->array_info, root, info->current_index, + &value, new_root); + if (r) + return r; + + info->current_index_set = false; + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_flush); + +static int read_bits(struct dm_disk_bitset *info, dm_block_t root, + uint32_t array_index) +{ + int r; + __le64 value; + + r = dm_array_get_value(&info->array_info, root, array_index, &value); + if (r) + return r; + + info->current_bits = le64_to_cpu(value); + info->current_index_set = true; + info->current_index = array_index; + return 0; +} + +static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned array_index = index / BITS_PER_ARRAY_ENTRY; + + if (info->current_index_set) { + if (info->current_index == array_index) + return 0; + + r = dm_bitset_flush(info, root, new_root); + if (r) + return r; + } + + return read_bits(info, root, array_index); +} + +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + set_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_set_bit); + +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + clear_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); + +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + *result = test_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_test_bit); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h new file mode 100644 index 000000000000..e1b9bea14aa1 --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.h @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_BITSET_H +#define _LINUX_DM_BITSET_H + +#include "dm-array.h" + +/*----------------------------------------------------------------*/ + +/* + * This bitset type is a thin wrapper round a dm_array of 64bit words. It + * uses a tiny, one word cache to reduce the number of array lookups and so + * increase performance. + * + * Like the dm-array that it's based on, the caller needs to keep track of + * the size of the bitset separately. The underlying dm-array implicitly + * knows how many words it's storing and will return -ENODATA if you try + * and access an out of bounds word. However, an out of bounds bit in the + * final word will _not_ be detected, you have been warned. + * + * Bits are indexed from zero. + + * Typical use: + * + * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init(). + * This describes the bitset and includes the cache. It's not called it + * dm_bitset_info in line with other data structures because it does + * include instance data. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an bitset. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty bitset with dm_bitset_empty(). + * + * Like the other data structures in this library, dm_bitset objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * Even read operations may trigger the cache to be flushed and as such + * return a root for a new, updated bitset. + * + * c) resize a bitset with dm_bitset_resize(). + * + * d) Set a bit with dm_bitset_set_bit(). + * + * e) Clear a bit with dm_bitset_clear_bit(). + * + * f) Test a bit with dm_bitset_test_bit(). + * + * g) Flush all updates from the cache with dm_bitset_flush(). + * + * h) Destroy the bitset with dm_bitset_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_bitset_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Opaque object. Unlike dm_array_info, you should have one of these per + * bitset. Initialise with dm_disk_bitset_init(). + */ +struct dm_disk_bitset { + struct dm_array_info array_info; + + uint32_t current_index; + uint64_t current_bits; + + bool current_index_set:1; +}; + +/* + * Sets up a dm_disk_bitset structure. You don't need to do anything with + * this structure when you finish using it. + * + * tm - the transaction manager that should supervise this structure + * info - the structure being initialised + */ +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info); + +/* + * Create an empty, zero length bitset. + * + * info - describes the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); + +/* + * Resize the bitset. + * + * info - describes the bitset + * old_root - the root block of the array on disk + * old_nr_entries - the number of bits in the old bitset + * new_nr_entries - the number of bits you want in the new bitset + * default_value - the value for any new bits + * new_root - on success, points to the new root block + */ +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root); + +/* + * Frees the bitset. + */ +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root); + +/* + * Set a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Clears a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Tests a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block (cached values may have been written) + * result - the bit value you're after + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result); + +/* + * Flush any cached changes to disk. + * + * info - describes the bitset + * root - the root block of the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_BITSET_H */ diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 5ba277768d99..81b513890e2b 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -25,7 +25,7 @@ * may be held at once. This is just an implementation detail. * * ii) Recursive locking attempts are detected and return EINVAL. A stack - * trace is also emitted for the previous lock aquisition. + * trace is also emitted for the previous lock acquisition. * * iii) Priority is given to write locks. */ @@ -109,7 +109,7 @@ static int __check_holder(struct block_lock *lock) DMERR("previously held here:"); print_stack_trace(lock->traces + i, 4); - DMERR("subsequent aquisition attempted here:"); + DMERR("subsequent acquisition attempted here:"); t.nr_entries = 0; t.max_entries = MAX_STACK; t.entries = entries; @@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } @@ -611,6 +613,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, return dm_bufio_write_dirty_buffers(bm->bufio); } +EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); void dm_bm_set_read_only(struct dm_block_manager *bm) { diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfeab1e8..37d367bb9aa8 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,8 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +void ro_pop(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +99,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +118,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +128,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e2359a07..c4f28133ef82 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912ee8ee..cf9fd676ae44 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } @@ -164,7 +164,14 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +void ro_pop(struct ro_spine *s) +{ + BUG_ON(!s->count); + --s->count; + unlock_block(s->info, s->nodes[s->count]); +} + +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc51f1a..35865425e4b4 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); @@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; @@ -802,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, return r ? r : count; } EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); + +/* + * FIXME: We shouldn't use a recursive algorithm when we have limited stack + * space. Also this only works for single level trees. + */ +static int walk_node(struct ro_spine *s, dm_block_t block, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + unsigned i, nr; + struct btree_node *n; + uint64_t keys; + + r = ro_step(s, block); + n = ro_node(s); + + nr = le32_to_cpu(n->header.nr_entries); + for (i = 0; i < nr; i++) { + if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { + r = walk_node(s, value64(n, i), fn, context); + if (r) + goto out; + } else { + keys = le64_to_cpu(*key_ptr(n, i)); + r = fn(context, &keys, value_ptr(n, i)); + if (r) + goto out; + } + } + +out: + ro_pop(s); + return r; +} + +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + struct ro_spine spine; + + BUG_ON(info->levels > 1); + + init_ro_spine(&spine, info); + r = walk_node(&spine, root, fn, context); + exit_ro_spine(&spine); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_walk); diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index ae02c84410ff..8672d159e0b5 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -35,7 +35,7 @@ struct dm_transaction_manager; */ /* - * Infomation about the values stored within the btree. + * Information about the values stored within the btree. */ struct dm_btree_value_type { void *context; @@ -58,21 +58,21 @@ struct dm_btree_value_type { * somewhere.) This method is _not_ called for insertion of a new * value: It is assumed the ref count is already 1. */ - void (*inc)(void *context, void *value); + void (*inc)(void *context, const void *value); /* * This value is being deleted. The btree takes care of freeing * the memory pointed to by @value. Often the del function just * needs to decrement a reference count somewhere. */ - void (*dec)(void *context, void *value); + void (*dec)(void *context, const void *value); /* * A test for equality between two values. When a value is * overwritten with a new one, the old one has the dec method * called _unless_ the new and old value are deemed equal. */ - int (*equal)(void *context, void *value1, void *value2); + int (*equal)(void *context, const void *value1, const void *value2); }; /* @@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, uint64_t *result_keys); +/* + * Iterate through the a btree, calling fn() on each entry. + * It only works for single level trees and is internally recursive, so + * monitor stack usage carefully. + */ +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context); + #endif /* _LINUX_DM_BTREE_H */ diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8cdec3..3e7a88d99eb0 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e7a519..906cf3df71af 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index d247a35da3c6..81da1a26042e 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -25,8 +25,8 @@ struct shadow_info { /* * It would be nice if we scaled with the size of transaction. */ -#define HASH_SIZE 256 -#define HASH_MASK (HASH_SIZE - 1) +#define DM_HASH_SIZE 256 +#define DM_HASH_MASK (DM_HASH_SIZE - 1) struct dm_transaction_manager { int is_clone; @@ -36,7 +36,7 @@ struct dm_transaction_manager { struct dm_space_map *sm; spinlock_t lock; - struct hlist_head buckets[HASH_SIZE]; + struct hlist_head buckets[DM_HASH_SIZE]; }; /*----------------------------------------------------------------*/ @@ -44,12 +44,11 @@ struct dm_transaction_manager { static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) { int r = 0; - unsigned bucket = dm_hash_block(b, HASH_MASK); + unsigned bucket = dm_hash_block(b, DM_HASH_MASK); struct shadow_info *si; - struct hlist_node *n; spin_lock(&tm->lock); - hlist_for_each_entry(si, n, tm->buckets + bucket, hlist) + hlist_for_each_entry(si, tm->buckets + bucket, hlist) if (si->where == b) { r = 1; break; @@ -71,7 +70,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) si = kmalloc(sizeof(*si), GFP_NOIO); if (si) { si->where = b; - bucket = dm_hash_block(b, HASH_MASK); + bucket = dm_hash_block(b, DM_HASH_MASK); spin_lock(&tm->lock); hlist_add_head(&si->hlist, tm->buckets + bucket); spin_unlock(&tm->lock); @@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) static void wipe_shadow_table(struct dm_transaction_manager *tm) { struct shadow_info *si; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; int i; spin_lock(&tm->lock); - for (i = 0; i < HASH_SIZE; i++) { + for (i = 0; i < DM_HASH_SIZE; i++) { bucket = tm->buckets + i; - hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) + hlist_for_each_entry_safe(si, tmp, bucket, hlist) kfree(si); INIT_HLIST_HEAD(bucket); @@ -115,7 +114,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, tm->sm = sm; spin_lock_init(&tm->lock); - for (i = 0; i < HASH_SIZE; i++) + for (i = 0; i < DM_HASH_SIZE; i++) INIT_HLIST_HEAD(tm->buckets + i); return tm; |