From df2cb6daa4cbc34406bc4b1ac9b9335df1083a72 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:33:46 -0700 Subject: block: Avoid deadlocks with bio allocation by stacking drivers Previously, if we ever try to allocate more than once from the same bio set while running under generic_make_request() (i.e. a stacking block driver), we risk deadlock. This is because of the code in generic_make_request() that converts recursion to iteration; any bios we submit won't actually be submitted (so they can complete and eventually be freed) until after we return - this means if we allocate a second bio, we're blocking the first one from ever being freed. Thus if enough threads call into a stacking block driver at the same time with bios that need multiple splits, and the bio_set's reserve gets used up, we deadlock. This can be worked around in the driver code - we could check if we're running under generic_make_request(), then mask out __GFP_WAIT when we go to allocate a bio, and if the allocation fails punt to workqueue and retry the allocation. But this is tricky and not a generic solution. This patch solves it for all users by inverting the previously described technique. We allocate a rescuer workqueue for each bio_set, and then in the allocation code if there are bios on current->bio_list we would be blocking, we punt them to the rescuer workqueue to be submitted. This guarantees forward progress for bio allocations under generic_make_request() provided each bio is submitted before allocating the next, and provided the bios are freed after they complete. Note that this doesn't do anything for allocation from other mempools. Instead of allocating per bio data structures from a mempool, code should use bio_set's front_pad. Tested it by forcing the rescue codepath to be taken (by disabling the first GFP_NOWAIT) attempt, and then ran it with bcache (which does a lot of arbitrary bio splitting) and verified that the rescuer was being invoked. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo Reviewed-by: Muthukumar Ratty --- fs/bio.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 2 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index bb5768f59b32..73b544709945 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -297,6 +297,54 @@ void bio_reset(struct bio *bio) } EXPORT_SYMBOL(bio_reset); +static void bio_alloc_rescue(struct work_struct *work) +{ + struct bio_set *bs = container_of(work, struct bio_set, rescue_work); + struct bio *bio; + + while (1) { + spin_lock(&bs->rescue_lock); + bio = bio_list_pop(&bs->rescue_list); + spin_unlock(&bs->rescue_lock); + + if (!bio) + break; + + generic_make_request(bio); + } +} + +static void punt_bios_to_rescuer(struct bio_set *bs) +{ + struct bio_list punt, nopunt; + struct bio *bio; + + /* + * In order to guarantee forward progress we must punt only bios that + * were allocated from this bio_set; otherwise, if there was a bio on + * there for a stacking driver higher up in the stack, processing it + * could require allocating bios from this bio_set, and doing that from + * our own rescuer would be bad. + * + * Since bio lists are singly linked, pop them all instead of trying to + * remove from the middle of the list: + */ + + bio_list_init(&punt); + bio_list_init(&nopunt); + + while ((bio = bio_list_pop(current->bio_list))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + + *current->bio_list = nopunt; + + spin_lock(&bs->rescue_lock); + bio_list_merge(&bs->rescue_list, &punt); + spin_unlock(&bs->rescue_lock); + + queue_work(bs->rescue_workqueue, &bs->rescue_work); +} + /** * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset); * previously allocated bio for IO before attempting to allocate a new one. * Failure to do so can cause deadlocks under memory pressure. * + * Note that when running under generic_make_request() (i.e. any block + * driver), bios are not submitted until after you return - see the code in + * generic_make_request() that converts recursion into iteration, to prevent + * stack overflows. + * + * This would normally mean allocating multiple bios under + * generic_make_request() would be susceptible to deadlocks, but we have + * deadlock avoidance code that resubmits any blocked bios from a rescuer + * thread. + * + * However, we do not guarantee forward progress for allocations from other + * mempools. Doing multiple allocations from the same mempool under + * generic_make_request() should be avoided - instead, use bio_set's front_pad + * for per bio allocations. + * * RETURNS: * Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; unsigned long idx = BIO_POOL_NONE; @@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) front_pad = 0; inline_vecs = nr_iovecs; } else { + /* + * generic_make_request() converts recursion to iteration; this + * means if we're running beneath it, any bios we allocate and + * submit will not be submitted (and thus freed) until after we + * return. + * + * This exposes us to a potential deadlock if we allocate + * multiple bios from the same bio_set() while running + * underneath generic_make_request(). If we were to allocate + * multiple bios (say a stacking block driver that was splitting + * bios), we would deadlock if we exhausted the mempool's + * reserve. + * + * We solve this, and guarantee forward progress, with a rescuer + * workqueue per bio_set. If we go to allocate and there are + * bios on current->bio_list, we first try the allocation + * without __GFP_WAIT; if that fails, we punt those bios we + * would be blocking to the rescuer workqueue before we retry + * with the original gfp_flags. + */ + + if (current->bio_list && !bio_list_empty(current->bio_list)) + gfp_mask &= ~__GFP_WAIT; + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (!p && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + p = mempool_alloc(bs->bio_pool, gfp_mask); + } + front_pad = bs->front_pad; inline_vecs = BIO_INLINE_VECS; } @@ -349,6 +443,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (nr_iovecs > inline_vecs) { bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (!bvl && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + } + if (unlikely(!bvl)) goto err_free; } else if (nr_iovecs) { @@ -1579,6 +1679,9 @@ static void biovec_free_pools(struct bio_set *bs) void bioset_free(struct bio_set *bs) { + if (bs->rescue_workqueue) + destroy_workqueue(bs->rescue_workqueue); + if (bs->bio_pool) mempool_destroy(bs->bio_pool); @@ -1614,6 +1717,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) bs->front_pad = front_pad; + spin_lock_init(&bs->rescue_lock); + bio_list_init(&bs->rescue_list); + INIT_WORK(&bs->rescue_work, bio_alloc_rescue); + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); if (!bs->bio_slab) { kfree(bs); @@ -1624,9 +1731,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (!biovec_create_pools(bs, pool_size)) - return bs; + if (biovec_create_pools(bs, pool_size)) + goto bad; + + bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + return bs; bad: bioset_free(bs); return NULL; -- cgit v1.2.3 From 9f060e2231ca96ca94f2ffcff730acd72606b280 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Oct 2012 15:29:33 -0700 Subject: block: Convert integrity to bvec_alloc_bs() This adds a pointer to the bvec array to struct bio_integrity_payload, instead of the bvecs always being inline; then the bvecs are allocated with bvec_alloc_bs(). Changed bvec_alloc_bs() and bvec_free_bs() to take a pointer to a mempool instead of the bioset, so that bio integrity can use a different mempool for its bvecs, and thus avoid a potential deadlock. This is eventually for immutable bio vecs - immutable bvecs aren't useful if we still have to copy them, hence the need for the pointer. Less code is always nice too, though. Also, bio_integrity_alloc() was using fs_bio_set if no bio_set was specified. This was wrong - using the bio_set doesn't protect us from memory allocation failures, because we just used kmalloc for the bio_integrity_payload. But it does introduce the possibility of deadlock, if for some reason we weren't supposed to be using fs_bio_set. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 73b544709945..40aa96eae99f 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx) return bvec_slabs[idx].nr_vecs; } -void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); if (idx == BIOVEC_MAX_IDX) - mempool_free(bv, bs->bvec_pool); + mempool_free(bv, pool); else { struct biovec_slab *bvs = bvec_slabs + idx; @@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) } } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, - struct bio_set *bs) +struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, + mempool_t *pool) { struct bio_vec *bvl; @@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, */ if (*idx == BIOVEC_MAX_IDX) { fallback: - bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + bvl = mempool_alloc(pool, gfp_mask); } else { struct biovec_slab *bvs = bvec_slabs + *idx; gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); @@ -253,7 +253,7 @@ static void bio_free(struct bio *bio) if (bs) { if (bio_has_allocated_vec(bio)) - bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -442,11 +442,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); } if (unlikely(!bvl)) @@ -1661,20 +1661,11 @@ EXPORT_SYMBOL(bio_sector_offset); * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. */ -static int biovec_create_pools(struct bio_set *bs, int pool_entries) +mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) { struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); - if (!bs->bvec_pool) - return -ENOMEM; - - return 0; -} - -static void biovec_free_pools(struct bio_set *bs) -{ - mempool_destroy(bs->bvec_pool); + return mempool_create_slab_pool(pool_entries, bp->slab); } void bioset_free(struct bio_set *bs) @@ -1685,8 +1676,10 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + if (bs->bvec_pool) + mempool_destroy(bs->bvec_pool); + bioset_integrity_free(bs); - biovec_free_pools(bs); bio_put_slab(bs); kfree(bs); @@ -1731,7 +1724,8 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (biovec_create_pools(bs, pool_size)) + bs->bvec_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_pool) goto bad; bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); -- cgit v1.2.3 From 054bdf646e36c2f7dc1bf6bc6209dbbb5909164b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Sep 2012 13:17:55 -0700 Subject: block: Add bio_advance() This is prep work for immutable bio vecs; we first want to centralize where bvecs are modified. Next two patches convert some existing code to use this function. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 40aa96eae99f..7edc08d2246c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,47 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @bytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +void bio_advance(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + bio_integrity_advance(bio, bytes); + + bio->bi_sector += bytes >> 9; + bio->bi_size -= bytes; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + return; + + while (bytes) { + if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + WARN_ONCE(1, "bio idx %d >= vcnt %d\n", + bio->bi_idx, bio->bi_vcnt); + break; + } + + if (bytes >= bio_iovec(bio)->bv_len) { + bytes -= bio_iovec(bio)->bv_len; + bio->bi_idx++; + } else { + bio_iovec(bio)->bv_len -= bytes; + bio_iovec(bio)->bv_offset += bytes; + bytes = 0; + } + } +} +EXPORT_SYMBOL(bio_advance); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; -- cgit v1.2.3 From 5b83636ae3c3b4f87d02a5929ad4dee831534db0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 15:20:38 -0700 Subject: block: Change bio_split() to respect the current value of bi_idx In the current code bio_split() won't be seeing partially completed bios so this doesn't change any behaviour, but this makes the code a bit clearer as to what bio_split() actually requires. The immediate purpose of the patch is removing unnecessary bi_idx references, but the end goal is to allow partial completed bios to be submitted, which along with immutable biovecs enables effecient bio splitting. Some of the callers were (double) checking that bios could be split, so update their checks too. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Lars Ellenberg CC: Neil Brown CC: Martin K. Petersen --- fs/bio.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 7edc08d2246c..f1b4c1651089 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1620,8 +1620,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_sector + first_sectors); - BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); - BUG_ON(bi->bi_idx != 0); + BUG_ON(bio_segments(bi) > 1); atomic_set(&bp->cnt, 3); bp->error = 0; bp->bio1 = *bi; @@ -1631,8 +1630,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) bp->bio1.bi_size = first_sectors << 9; if (bi->bi_vcnt != 0) { - bp->bv1 = bi->bi_io_vec[0]; - bp->bv2 = bi->bi_io_vec[0]; + bp->bv1 = *bio_iovec(bi); + bp->bv2 = *bio_iovec(bi); if (bio_is_rw(bi)) { bp->bv2.bv_offset += first_sectors << 9; -- cgit v1.2.3 From 9e882242c6193ae6f416f2d8d8db0d9126bd996b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:41:12 -0700 Subject: block: Add submit_bio_wait(), remove from md Random cleanup - this code was duplicated and it's not really specific to md. Also added the ability to return the actual error code. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown Acked-by: Tejun Heo --- fs/bio.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index f1b4c1651089..4ce24ee5dcd0 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,42 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +struct submit_bio_ret { + struct completion event; + int error; +}; + +static void submit_bio_wait_endio(struct bio *bio, int error) +{ + struct submit_bio_ret *ret = bio->bi_private; + + ret->error = error; + complete(&ret->event); +} + +/** + * submit_bio_wait - submit a bio, and wait until it completes + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bio: The &struct bio which describes the I/O + * + * Simple wrapper around submit_bio(). Returns 0 on success, or the error from + * bio_endio() on failure. + */ +int submit_bio_wait(int rw, struct bio *bio) +{ + struct submit_bio_ret ret; + + rw |= REQ_SYNC; + init_completion(&ret.event); + bio->bi_private = &ret; + bio->bi_end_io = submit_bio_wait_endio; + submit_bio(rw, bio); + wait_for_completion(&ret.event); + + return ret.error; +} +EXPORT_SYMBOL(submit_bio_wait); + /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance -- cgit v1.2.3 From 16ac3d63e74f3d6e34e42d6e523b6a61de0020f0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 13:57:51 -0700 Subject: block: Add bio_copy_data() This gets open coded quite a bit and it's tricky to get right, so make a generic version and convert some existing users over to it instead. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 4ce24ee5dcd0..e437f9aae67d 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,76 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats + * @src and @dst as linked lists of bios. + * + * Stops when it reaches the end of either @src or @dst - that is, copies + * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). + */ +void bio_copy_data(struct bio *dst, struct bio *src) +{ + struct bio_vec *src_bv, *dst_bv; + unsigned src_offset, dst_offset, bytes; + void *src_p, *dst_p; + + src_bv = bio_iovec(src); + dst_bv = bio_iovec(dst); + + src_offset = src_bv->bv_offset; + dst_offset = dst_bv->bv_offset; + + while (1) { + if (src_offset == src_bv->bv_offset + src_bv->bv_len) { + src_bv++; + if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { + src = src->bi_next; + if (!src) + break; + + src_bv = bio_iovec(src); + } + + src_offset = src_bv->bv_offset; + } + + if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { + dst_bv++; + if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { + dst = dst->bi_next; + if (!dst) + break; + + dst_bv = bio_iovec(dst); + } + + dst_offset = dst_bv->bv_offset; + } + + bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, + src_bv->bv_offset + src_bv->bv_len - src_offset); + + src_p = kmap_atomic(src_bv->bv_page); + dst_p = kmap_atomic(dst_bv->bv_page); + + memcpy(dst_p + dst_bv->bv_offset, + src_p + src_bv->bv_offset, + bytes); + + kunmap_atomic(dst_p); + kunmap_atomic(src_p); + + src_offset += bytes; + dst_offset += bytes; + } +} +EXPORT_SYMBOL(bio_copy_data); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; -- cgit v1.2.3 From d74c6d514fe314b8bdab58b487b25992291577ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Feb 2013 12:23:11 -0800 Subject: block: Add bio_for_each_segment_all() __bio_for_each_segment() iterates bvecs from the specified index instead of bio->bv_idx. Currently, the only usage is to walk all the bvecs after the bio has been advanced by specifying 0 index. For immutable bvecs, we need to split these apart; bio_for_each_segment() is going to have a different implementation. This will also help document the intent of code that's using it - bio_for_each_segment_all() is only legal to use for code that owns the bio. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Neil Brown CC: Boaz Harrosh --- fs/bio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index e437f9aae67d..618f9044c414 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -961,7 +961,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int iov_idx = 0; unsigned int iov_off = 0; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *bv_addr = page_address(bvec->bv_page); unsigned int bv_len = iovecs[i].bv_len; @@ -1143,7 +1143,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, return bio; cleanup: if (!map_data) - bio_for_each_segment(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); @@ -1357,7 +1357,7 @@ static void __bio_unmap_user(struct bio *bio) /* * make sure we dirty pages we wrote to */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { if (bio_data_dir(bio) == READ) set_page_dirty_lock(bvec->bv_page); @@ -1463,7 +1463,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) int i; char *p = bmd->sgvecs[0].iov_base; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); int len = bmd->iovecs[i].bv_len; @@ -1503,7 +1503,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, if (!reading) { void *p = data; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); memcpy(addr, p, bvec->bv_len); @@ -1789,7 +1789,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index, if (index >= bio->bi_idx) index = bio->bi_vcnt - 1; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { if (i == index) { if (offset > bv->bv_offset) sectors += (offset - bv->bv_offset) / sector_sz; -- cgit v1.2.3 From cb34e057ad22a1c2c6f2cb6cd1cbd05cc2f28f28 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Sep 2012 15:22:02 -0700 Subject: block: Convert some code to bio_for_each_segment_all() More prep work for immutable bvecs: A few places in the code were either open coding or using the wrong version - fix. After we introduce the bvec iter, it'll no longer be possible to modify the biovec through bio_for_each_segment_all() - it doesn't increment a pointer to the current bvec, you pass in a struct bio_vec (not a pointer) which is updated with what the current biovec would be (taking into account bi_bvec_done and bi_size). So because of that it's more worthwhile to be consistent about bio_for_each_segment()/bio_for_each_segment_all() usage. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown CC: Alasdair Kergon CC: dm-devel@redhat.com CC: Alexander Viro --- fs/bio.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index 618f9044c414..fe3aee90c988 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1548,11 +1548,11 @@ EXPORT_SYMBOL(bio_copy_kern); */ void bio_set_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page && !PageCompound(page)) set_page_dirty_lock(page); @@ -1561,11 +1561,11 @@ void bio_set_pages_dirty(struct bio *bio) static void bio_release_pages(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page) put_page(page); @@ -1614,16 +1614,16 @@ static void bio_dirty_fn(struct work_struct *work) void bio_check_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int nr_clean_pages = 0; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (PageDirty(page) || PageCompound(page)) { page_cache_release(page); - bvec[i].bv_page = NULL; + bvec->bv_page = NULL; } else { nr_clean_pages++; } -- cgit v1.2.3 From a07876064a0b73ab5ef1ebcf14b1cf0231c07858 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:03:28 -0700 Subject: block: Add bio_alloc_pages() More utility code to replace stuff that's getting open coded. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- fs/bio.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index fe3aee90c988..e545a440d376 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,34 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_alloc_pages - allocates a single page for each bvec in a bio + * @bio: bio to allocate pages for + * @gfp_mask: flags for allocation + * + * Allocates pages up to @bio->bi_vcnt. + * + * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are + * freed. + */ +int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, i) { + bv->bv_page = alloc_page(gfp_mask); + if (!bv->bv_page) { + while (--bv >= bio->bi_io_vec) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL(bio_alloc_pages); + /** * bio_copy_data - copy contents of data buffers from one chain of bios to * another -- cgit v1.2.3 From a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 25 May 2012 13:03:11 -0700 Subject: block: Add an explicit bio flag for bios that own their bvec This is for the new bio splitting code. When we split a bio, if the split occured on a bvec boundry we reuse the bvec for the new bio. But that means bio_free() can't free it, hence the explicit flag. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo --- fs/bio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/bio.c') diff --git a/fs/bio.c b/fs/bio.c index e545a440d376..9238a54b562c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -252,7 +252,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_has_allocated_vec(bio)) + if (bio_flagged(bio, BIO_OWNS_VEC)) bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* @@ -451,6 +451,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; + + bio->bi_flags |= 1 << BIO_OWNS_VEC; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } -- cgit v1.2.3