From b7c335713ea130d707c22d7f7c57a8eca75ded7e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 26 May 2010 15:41:14 +0200 Subject: brd: support discard Support discard requests in brd by zeroing or deleting the underlying backing pages. This is simply to help with testing and documentation nature of brd code. Signed-off-by: Nick Piggin Signed-off-by: Jens Axboe --- drivers/block/brd.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6081e81d5738..f1bf79d9bc0a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) return page; } +static void brd_free_page(struct brd_device *brd, sector_t sector) +{ + struct page *page; + pgoff_t idx; + + spin_lock(&brd->brd_lock); + idx = sector >> PAGE_SECTORS_SHIFT; + page = radix_tree_delete(&brd->brd_pages, idx); + spin_unlock(&brd->brd_lock); + if (page) + __free_page(page); +} + +static void brd_zero_page(struct brd_device *brd, sector_t sector) +{ + struct page *page; + + page = brd_lookup_page(brd, sector); + if (page) + clear_highpage(page); +} + /* * Free all backing store pages and radix tree. This must only be called when * there are no other users of the device. @@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) return 0; } +static void discard_from_brd(struct brd_device *brd, + sector_t sector, size_t n) +{ + while (n >= PAGE_SIZE) { + /* + * Don't want to actually discard pages here because + * re-allocating the pages can result in writeback + * deadlocks under heavy load. + */ + if (0) + brd_free_page(brd, sector); + else + brd_zero_page(brd, sector); + sector += PAGE_SIZE >> SECTOR_SHIFT; + n -= PAGE_SIZE; + } +} + /* * Copy n bytes from src to the brd starting at sector. Does not sleep. */ @@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) get_capacity(bdev->bd_disk)) goto out; + if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { + err = 0; + discard_from_brd(brd, sector, bio->bi_size); + goto out; + } + rw = bio_rw(bio); if (rw == READA) rw = READ; @@ -320,7 +366,7 @@ out: } #ifdef CONFIG_BLK_DEV_XIP -static int brd_direct_access (struct block_device *bdev, sector_t sector, +static int brd_direct_access(struct block_device *bdev, sector_t sector, void **kaddr, unsigned long *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; @@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i) blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); + brd->brd_queue->limits.discard_granularity = PAGE_SIZE; + brd->brd_queue->limits.max_discard_sectors = UINT_MAX; + brd->brd_queue->limits.discard_zeroes_data = 1; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); + disk = brd->brd_disk = alloc_disk(1 << part_shift); if (!disk) goto out_free_queue; -- cgit v1.2.3 From 2c8d196759054b632788633b20e39167df36041d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 25 May 2010 14:32:03 +0200 Subject: drbd: Revert "drbd: Create new current UUID as late as possible" The late-UUID writing is delayed until the next release. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 9 +-------- drivers/block/drbd/drbd_main.c | 34 ++++++---------------------------- drivers/block/drbd/drbd_receiver.c | 11 ----------- 3 files changed, 7 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e9654c8d5b62..c194348a46ed 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -943,8 +943,7 @@ struct drbd_conf { struct drbd_work resync_work, unplug_work, md_sync_work, - delay_probe_work, - uuid_work; + delay_probe_work; struct timer_list resync_timer; struct timer_list md_sync_timer; struct timer_list delay_probe_timer; @@ -1069,7 +1068,6 @@ struct drbd_conf { struct timeval dps_time; /* delay-probes-start-time */ unsigned int dp_volume_last; /* send_cnt of last delay probe */ int c_sync_rate; /* current resync rate after delay_probe magic */ - atomic_t new_c_uuid; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -2219,8 +2217,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) return 0; if (test_bit(BITMAP_IO, &mdev->flags)) return 0; - if (atomic_read(&mdev->new_c_uuid)) - return 0; return 1; } @@ -2241,9 +2237,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ - if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) - drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); - spin_lock_irq(&mdev->req_lock); while (!__inc_ap_bio_cond(mdev)) { prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be2d2da9cdba..1fcf2d1bcc39 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ns.pdsk == D_OUTDATED)) { if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && - !atomic_read(&mdev->new_c_uuid)) - atomic_set(&mdev->new_c_uuid, 2); + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } put_ldev(mdev); } } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - /* Diskless peer becomes primary or got connected do diskless, primary peer. */ - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && - !atomic_read(&mdev->new_c_uuid)) - atomic_set(&mdev->new_c_uuid, 2); + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) + drbd_uuid_new_current(mdev); /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) @@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } -static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - if (get_ldev(mdev)) { - if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { - drbd_uuid_new_current(mdev); - if (get_net_conf(mdev)) { - drbd_send_uuids(mdev); - put_net_conf(mdev); - } - drbd_md_sync(mdev); - } - put_ldev(mdev); - } - atomic_dec(&mdev->new_c_uuid); - wake_up(&mdev->misc_wait); - - return 1; -} static int drbd_thread_setup(void *arg) { @@ -2708,7 +2689,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); - atomic_set(&mdev->new_c_uuid, 0); mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->data.mutex); @@ -2739,14 +2719,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->bm_io_work.w.list); INIT_LIST_HEAD(&mdev->delay_probes); INIT_LIST_HEAD(&mdev->delay_probe_work.list); - INIT_LIST_HEAD(&mdev->uuid_work.list); mdev->resync_work.cb = w_resync_inactive; mdev->unplug_work.cb = w_send_write_hint; mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; mdev->delay_probe_work.cb = w_delay_probes; - mdev->uuid_work.cb = w_new_current_uuid; init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); init_timer(&mdev->delay_probe_timer); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc9ab7fb2cc7..a56340dd5710 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1154,17 +1154,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; - if (atomic_read(&mdev->new_c_uuid)) { - if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { - drbd_uuid_new_current(mdev); - drbd_md_sync(mdev); - - atomic_dec(&mdev->new_c_uuid); - wake_up(&mdev->misc_wait); - } - wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); - } - /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the -- cgit v1.2.3 From 344fa462e3246bd102059ccc3c59deef416676dd Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 25 May 2010 14:23:57 +0200 Subject: drbd: improve network latency, TCP_QUICKACK On Thu, Apr 29, 2010 at 04:00:50PM -0400, Eduard.Guzovsky@stratus.com wrote on drbd-dev@lists.linbit.com Subject: [Drbd-dev] DRBD small synchronous writes performance improvements > 1. TCP_QUICKACK option is set incorrectly. The goal was force TCP to > send and ACK as a "one time" event. Instead the code permanently sets > connection in the QUICKACK mode. He is right, we actually want to use an even val with TCP_QUICKACK. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c194348a46ed..6d79a76ba597 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1540,7 +1540,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) static inline void drbd_tcp_quickack(struct socket *sock) { - int __user val = 1; + int __user val = 2; (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char __user *)&val, sizeof(val)); } -- cgit v1.2.3 From 5dbf1673383f2f1554f0634fdfc390d59dc2c7d6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 25 May 2010 16:18:01 +0200 Subject: drbd: need to set socket bufsize early to take effect quoting tcp(7): On individual connections, the socket buffer size must be set prior to the listen(2) or connect(2) calls in order to have it take effect. This adds a wrapper to do so, and uses it appropriately. Improves performance in certain situations. Note that because we cannot easily determine which socket will be "meta" and wich "data" (bulk) socket, we adjust both sockets. Previously, DRBD only adjusted the bufsizes of the "data" socket. Thanks again to Eduard.Guzovsky@stratus.com. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a56340dd5710..f9a3e199e715 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -571,6 +571,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) return rv; } +/* quoting tcp(7): + * On individual connections, the socket buffer size must be set prior to the + * listen(2) or connect(2) calls in order to have it take effect. + * This is our wrapper to do so. + */ +static void drbd_setbufsize(struct socket *sock, unsigned int snd, + unsigned int rcv) +{ + /* open coded SO_SNDBUF, SO_RCVBUF */ + if (snd) { + sock->sk->sk_sndbuf = snd; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } + if (rcv) { + sock->sk->sk_rcvbuf = rcv; + sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } +} + static struct socket *drbd_try_connect(struct drbd_conf *mdev) { const char *what; @@ -592,6 +611,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) sock->sk->sk_rcvtimeo = sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, + mdev->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -670,6 +691,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; + drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, + mdev->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, @@ -856,16 +879,6 @@ retry: sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; msock->sk->sk_priority = TC_PRIO_INTERACTIVE; - if (mdev->net_conf->sndbuf_size) { - sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - } - - if (mdev->net_conf->rcvbuf_size) { - sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; - sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - } - /* NOT YET ... * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; -- cgit v1.2.3 From ba11ad9a3b9dd2dbb9c6686ea9d41a9a77d94327 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 25 May 2010 16:26:16 +0200 Subject: drbd: improve usage of MSG_MORE It seems to improve performance if we allow the "p_data" header in its own frame (no MSG_MORE), but sendpage all but the last page with MSG_MORE. This is also in preparation of a later zero copy receive implementation. Suggested by Eduard.Guzovsky@stratus.com on drbd-dev. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1fcf2d1bcc39..c978557b4b80 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2272,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * * with page_count == 0 or PageSlab. */ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -2282,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, } static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { mm_segment_t oldfs = get_fs(); int sent, ok; @@ -2295,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return _drbd_no_send_page(mdev, page, offset, size); + return _drbd_no_send_page(mdev, page, offset, size, msg_flags); + msg_flags |= MSG_NOSIGNAL; drbd_update_congested(mdev); set_fs(KERNEL_DS); do { sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, len, - MSG_NOSIGNAL); + msg_flags); if (sent == -EAGAIN) { if (we_should_drop_the_connection(mdev, mdev->data.socket)) @@ -2331,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } return 1; @@ -2343,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } - return 1; } @@ -2356,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct page *page = e->pages; unsigned len = e->size; + /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - if (!_drbd_send_page(mdev, page, 0, l)) + if (!_drbd_send_page(mdev, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0)) return 0; len -= l; } @@ -2438,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) { if (mdev->net_conf->wire_protocol == DRBD_PROT_A) @@ -2491,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, return 0; ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, - sizeof(p), MSG_MORE); + sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, e); -- cgit v1.2.3 From 039e1fb65496636778e24c881a5e58ed7c39fbb3 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sun, 23 May 2010 21:48:13 +0200 Subject: drbd: removed duplicated #includes drbd/drbd_receiver.c: linux/mm.h is included more than once. Signed-off-by: Andrea Gelmini Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f9a3e199e715..dff48701b84d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include "drbd_int.h" -- cgit v1.2.3 From 32fa7e91f923d8b2578c42016ff3a94efc9968a2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 26 May 2010 17:13:18 +0200 Subject: drbd: Removed the now empty w_io_error() function Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_req.c | 27 +-------------------------- drivers/block/drbd/drbd_worker.c | 14 -------------- 3 files changed, 1 insertion(+), 41 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d79a76ba597..86605f6d0854 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1474,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); -extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3397f11d0ba9..e6c4d579eaba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const } } - /* if it was a local io error, we want to notify our - * peer about that, and see if we need to - * detach the disk and stuff. - * to avoid allocating some special work - * struct, reuse the request. */ - - /* THINK - * why do we do this not when we detect the error, - * but delay it until it is "done", i.e. possibly - * until the next barrier ack? */ - - if (rw == WRITE && - ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { - if (!(req->w.list.next == LIST_POISON1 || - list_empty(&req->w.list))) { - /* DEBUG ASSERT only; if this triggers, we - * probably corrupt the worker list here */ - dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); - dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); - } - req->w.cb = w_io_error; - drbd_queue_work(&mdev->data.work, &req->w); - /* drbd_req_free() is done in w_io_error */ - } else { - drbd_req_free(req); - } + drbd_req_free(req); } static void queue_barrier(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 727ff6339754..a12b447bafbd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -257,20 +257,6 @@ void drbd_endio_pri(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - struct drbd_request *req = container_of(w, struct drbd_request, w); - - /* NOTE: mdev->ldev can be NULL by the time we get here! */ - /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ - - /* the only way this callback is scheduled is from _req_may_be_done, - * when it is done and had a local write error, see comments there */ - drbd_req_free(req); - - return TRUE; -} - int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); -- cgit v1.2.3 From d255e5ff5fc6cc6c60dd014d1261448a7bbc8134 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 May 2010 09:45:45 +0200 Subject: drbd: fix hang on local read errors while disconnected "canceled" w_read_retry_remote never completed, if they have been canceled after drbd_disconnect connection teardown cleanup has already run (or we are currently not connected anyways). Fixed by not queueing a remote retry if we already know it won't work (pdsk not uptodate), and cleanup ourselves on "cancel", in case we hit a race with drbd_disconnect. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 22 +++++++++++++--------- drivers/block/drbd/drbd_req.h | 1 + drivers/block/drbd/drbd_worker.c | 6 ++---- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e6c4d579eaba..8915644af722 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -452,20 +452,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", (unsigned long long)req->sector, req->size); - /* _req_mod(req,to_be_send); oops, recursion... */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); - req->rq_state |= RQ_NET_PENDING; - inc_ap_pending(mdev); __drbd_chk_io_error(mdev, FALSE); put_ldev(mdev); - /* NOTE: if we have no connection, - * or know the peer has no good data either, - * then we don't actually need to "queue_for_net_read", - * but we do so anyways, since the drbd_io_error() - * and the potential state change to "Diskless" - * needs to be done from process context */ + /* no point in retrying if there is no good remote data, + * or we have no connection. */ + if (mdev->state.pdsk != D_UP_TO_DATE) { + _req_may_be_done(req, m); + break; + } + + /* _req_mod(req,to_be_send); oops, recursion... */ + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); /* fall through: _req_mod(req,queue_for_net_read); */ case queue_for_net_read: @@ -575,6 +576,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); break; + case read_retry_remote_canceled: + req->rq_state &= ~RQ_NET_QUEUED; + /* fall through, in case we raced with drbd_disconnect */ case connection_lost_while_pending: /* transfer log cleanup after connection loss */ /* assert something? */ diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 16119d7056cc..02d575d24518 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -91,6 +91,7 @@ enum drbd_req_event { send_failed, handed_over_to_network, connection_lost_while_pending, + read_retry_remote_canceled, recv_acked_by_peer, write_acked_by_peer, write_acked_by_peer_and_sis, /* and set_in_sync */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a12b447bafbd..67371fcbc5aa 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -266,10 +266,8 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * to give the disk the chance to relocate that block */ spin_lock_irq(&mdev->req_lock); - if (cancel || - mdev->state.conn < C_CONNECTED || - mdev->state.pdsk <= D_INCONSISTENT) { - _req_mod(req, send_canceled); + if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { + _req_mod(req, read_retry_remote_canceled); spin_unlock_irq(&mdev->req_lock); dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); return 1; -- cgit v1.2.3 From 7383506c87237dbd627f0b8b72b50117f25c5ca2 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 May 2010 11:51:56 +0200 Subject: drbd: use drbd specific ratelimit instead of global printk_ratelimit using the global printk_ratelimit() may mask other messages. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 86605f6d0854..485ed8c7d623 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1725,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: if (!forcedetach) { - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s." "Passing error on...\n", where); break; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c978557b4b80..6b077f93acc6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3783,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) if (ret) { fault_count++; - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "***Simulating %s failure\n", _drbd_fault_str(type)); } -- cgit v1.2.3 From 2a0ab2cd73c26835e635ed4e3868f983519048fb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 26 May 2010 17:59:55 +0200 Subject: drbd: Reduce verbosity The "Local READ/WRITE failed" messages are too verbose. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 5 ----- drivers/block/drbd/drbd_worker.c | 4 ---- 2 files changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8915644af722..654f1ef5cbb0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -428,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", - (unsigned long long)req->sector, req->size); - /* and now: check how to handle local io error. */ __drbd_chk_io_error(mdev, FALSE); _req_may_be_done(req, m); put_ldev(mdev); @@ -450,8 +447,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", - (unsigned long long)req->sector, req->size); D_ASSERT(!(req->rq_state & RQ_NET_MASK)); __drbd_chk_io_error(mdev, FALSE); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 67371fcbc5aa..b623ceee2a4a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error) enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); - if (error) - dev_warn(DEV, "p %s: error=%d\n", - bio_data_dir(bio) == WRITE ? "write" : "read", error); if (!error && !uptodate) { dev_warn(DEV, "p %s: setting error to -EIO\n", bio_data_dir(bio) == WRITE ? "write" : "read"); @@ -269,7 +266,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { _req_mod(req, read_retry_remote_canceled); spin_unlock_irq(&mdev->req_lock); - dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); return 1; } spin_unlock_irq(&mdev->req_lock); -- cgit v1.2.3 From 713b686494a577b3c4f4f9f585a4705fc30d51c2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Jun 2010 12:17:48 +0200 Subject: cciss: call BUG() earlier I moved the range check after the increment. The current code would write past the end of the array once before calling BUG(). Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/block/cciss_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1d0e2cfec72..3381505c8a6c 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd) sa = h->scsi_ctlr; stk = &sa->cmd_stack; + stk->top++; if (stk->top >= CMD_STACK_SIZE) { printk("cciss: scsi_cmd_free called too many times.\n"); BUG(); } - stk->top++; stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; } -- cgit v1.2.3