diff options
Diffstat (limited to 'drivers/md/dm-kcopyd.c')
-rw-r--r-- | drivers/md/dm-kcopyd.c | 139 |
1 files changed, 134 insertions, 5 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c93d8d..d581fe5d2faf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/delay.h> #include <linux/device-mapper.h> #include <linux/dm-kcopyd.h> @@ -51,6 +52,8 @@ struct dm_kcopyd_client { struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; + struct dm_kcopyd_throttle *throttle; + /* * We maintain three lists of jobs: * @@ -68,6 +71,117 @@ struct dm_kcopyd_client { static struct page_list zero_page_list; +static DEFINE_SPINLOCK(throttle_spinlock); + +/* + * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. + * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided + * by 2. + */ +#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ + +/* + * Sleep this number of milliseconds. + * + * The value was decided experimentally. + * Smaller values seem to cause an increased copy rate above the limit. + * The reason for this is unknown but possibly due to jiffies rounding errors + * or read/write cache inside the disk. + */ +#define SLEEP_MSEC 100 + +/* + * Maximum number of sleep events. There is a theoretical livelock if more + * kcopyd clients do work simultaneously which this limit avoids. + */ +#define MAX_SLEEPS 10 + +static void io_job_start(struct dm_kcopyd_throttle *t) +{ + unsigned throttle, now, difference; + int slept = 0, skew; + + if (unlikely(!t)) + return; + +try_again: + spin_lock_irq(&throttle_spinlock); + + throttle = ACCESS_ONCE(t->throttle); + + if (likely(throttle >= 100)) + goto skip_limit; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + if (t->num_io_jobs) + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + + if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { + int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); + t->total_period >>= shift; + t->io_period >>= shift; + } + + skew = t->io_period - throttle * t->total_period / 100; + + if (unlikely(skew > 0) && slept < MAX_SLEEPS) { + slept++; + spin_unlock_irq(&throttle_spinlock); + msleep(SLEEP_MSEC); + goto try_again; + } + +skip_limit: + t->num_io_jobs++; + + spin_unlock_irq(&throttle_spinlock); +} + +static void io_job_finish(struct dm_kcopyd_throttle *t) +{ + unsigned long flags; + + if (unlikely(!t)) + return; + + spin_lock_irqsave(&throttle_spinlock, flags); + + t->num_io_jobs--; + + if (likely(ACCESS_ONCE(t->throttle) >= 100)) + goto skip_limit; + + if (!t->num_io_jobs) { + unsigned now, difference; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + } + +skip_limit: + spin_unlock_irqrestore(&throttle_spinlock, flags); +} + + static void wake(struct dm_kcopyd_client *kc) { queue_work(kc->kcopyd_wq, &kc->kcopyd_work); @@ -348,8 +462,10 @@ static void complete_io(unsigned long error, void *context) struct kcopyd_job *job = (struct kcopyd_job *) context; struct dm_kcopyd_client *kc = job->kc; + io_job_finish(kc->throttle); + if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +477,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + io_job_start(job->kc->throttle); + if (job->rw == READ) r = dm_io(&io_req, 1, &job->source, NULL); else @@ -432,7 +550,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +703,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +730,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; @@ -685,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -struct dm_kcopyd_client *dm_kcopyd_client_create(void) +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -698,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->throttle = throttle; kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!kc->job_pool) |