From d221938c049f4845da13c8593132595a6b9222a8 Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Fri, 25 Jan 2008 21:08:01 +0100 Subject: cpu-hotplug: refcount based cpu hotplug This patch implements a Refcount + Waitqueue based model for cpu-hotplug. Now, a thread which wants to prevent cpu-hotplug, will bump up a global refcount and the thread which wants to perform a cpu-hotplug operation will block till the global refcount goes to zero. The readers, if any, during an ongoing cpu-hotplug operation are blocked until the cpu-hotplug operation is over. Signed-off-by: Gautham R Shenoy Signed-off-by: Paul Jackson [For !CONFIG_HOTPLUG_CPU ] Signed-off-by: Ingo Molnar --- kernel/cpu.c | 152 +++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 111 insertions(+), 41 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 6b3a0c15144f..656dc3fcbbae 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,9 +15,8 @@ #include #include -/* This protects CPUs going up and down... */ +/* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); -static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); @@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); */ static int cpu_hotplug_disabled; -#ifdef CONFIG_HOTPLUG_CPU +static struct { + struct task_struct *active_writer; + struct mutex lock; /* Synchronizes accesses to refcount, */ + /* + * Also blocks the new readers during + * an ongoing cpu hotplug operation. + */ + int refcount; + wait_queue_head_t writer_queue; +} cpu_hotplug; -/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ -static struct task_struct *recursive; -static int recursive_depth; +#define writer_exists() (cpu_hotplug.active_writer != NULL) + +void __init cpu_hotplug_init(void) +{ + cpu_hotplug.active_writer = NULL; + mutex_init(&cpu_hotplug.lock); + cpu_hotplug.refcount = 0; + init_waitqueue_head(&cpu_hotplug.writer_queue); +} + +#ifdef CONFIG_HOTPLUG_CPU void lock_cpu_hotplug(void) { - struct task_struct *tsk = current; - - if (tsk == recursive) { - static int warnings = 10; - if (warnings) { - printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); - WARN_ON(1); - warnings--; - } - recursive_depth++; + might_sleep(); + if (cpu_hotplug.active_writer == current) return; - } - mutex_lock(&cpu_bitmask_lock); - recursive = tsk; + mutex_lock(&cpu_hotplug.lock); + cpu_hotplug.refcount++; + mutex_unlock(&cpu_hotplug.lock); + } EXPORT_SYMBOL_GPL(lock_cpu_hotplug); void unlock_cpu_hotplug(void) { - WARN_ON(recursive != current); - if (recursive_depth) { - recursive_depth--; + if (cpu_hotplug.active_writer == current) return; - } - recursive = NULL; - mutex_unlock(&cpu_bitmask_lock); + mutex_lock(&cpu_hotplug.lock); + cpu_hotplug.refcount--; + + if (unlikely(writer_exists()) && !cpu_hotplug.refcount) + wake_up(&cpu_hotplug.writer_queue); + + mutex_unlock(&cpu_hotplug.lock); + } EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * The following two API's must be used when attempting + * to serialize the updates to cpu_online_map, cpu_present_map. + */ +void cpu_maps_update_begin(void) +{ + mutex_lock(&cpu_add_remove_lock); +} + +void cpu_maps_update_done(void) +{ + mutex_unlock(&cpu_add_remove_lock); +} + +/* + * This ensures that the hotplug operation can begin only when the + * refcount goes to zero. + * + * Note that during a cpu-hotplug operation, the new readers, if any, + * will be blocked by the cpu_hotplug.lock + * + * Since cpu_maps_update_begin is always called after invoking + * cpu_maps_update_begin, we can be sure that only one writer is active. + * + * Note that theoretically, there is a possibility of a livelock: + * - Refcount goes to zero, last reader wakes up the sleeping + * writer. + * - Last reader unlocks the cpu_hotplug.lock. + * - A new reader arrives at this moment, bumps up the refcount. + * - The writer acquires the cpu_hotplug.lock finds the refcount + * non zero and goes to sleep again. + * + * However, this is very difficult to achieve in practice since + * lock_cpu_hotplug() not an api which is called all that often. + * + */ +static void cpu_hotplug_begin(void) +{ + DECLARE_WAITQUEUE(wait, current); + + mutex_lock(&cpu_hotplug.lock); + + cpu_hotplug.active_writer = current; + add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); + while (cpu_hotplug.refcount) { + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&cpu_hotplug.lock); + schedule(); + mutex_lock(&cpu_hotplug.lock); + } + remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); +} + +static void cpu_hotplug_done(void) +{ + cpu_hotplug.active_writer = NULL; + mutex_unlock(&cpu_hotplug.lock); +} /* Need to know about CPUs going up/down? */ int __cpuinit register_cpu_notifier(struct notifier_block *nb) { int ret; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); ret = raw_notifier_chain_register(&cpu_chain, nb); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return ret; } @@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); raw_notifier_chain_unregister(&cpu_chain, nb); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -147,6 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) if (!cpu_online(cpu)) return -EINVAL; + cpu_hotplug_begin(); raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); @@ -166,9 +237,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) cpu_clear(cpu, tmp); set_cpus_allowed(current, tmp); - mutex_lock(&cpu_bitmask_lock); p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); - mutex_unlock(&cpu_bitmask_lock); if (IS_ERR(p) || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ @@ -203,6 +272,7 @@ out_allowed: set_cpus_allowed(current, old_allowed); out_release: raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); + cpu_hotplug_done(); return err; } @@ -210,13 +280,13 @@ int cpu_down(unsigned int cpu) { int err = 0; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); if (cpu_hotplug_disabled) err = -EBUSY; else err = _cpu_down(cpu, 0); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ @@ -231,6 +301,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) if (cpu_online(cpu) || !cpu_present(cpu)) return -EINVAL; + cpu_hotplug_begin(); raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); @@ -243,9 +314,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) } /* Arch-specific enabling code. */ - mutex_lock(&cpu_bitmask_lock); ret = __cpu_up(cpu); - mutex_unlock(&cpu_bitmask_lock); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); @@ -258,6 +327,7 @@ out_notify: __raw_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); + cpu_hotplug_done(); return ret; } @@ -275,13 +345,13 @@ int __cpuinit cpu_up(unsigned int cpu) return -EINVAL; } - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); if (cpu_hotplug_disabled) err = -EBUSY; else err = _cpu_up(cpu, 0); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return err; } @@ -292,7 +362,7 @@ int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); first_cpu = first_cpu(cpu_online_map); /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time @@ -319,7 +389,7 @@ int disable_nonboot_cpus(void) } else { printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return error; } @@ -328,7 +398,7 @@ void enable_nonboot_cpus(void) int cpu, error; /* Allow everyone to use the CPU hotplug again */ - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); cpu_hotplug_disabled = 0; if (cpus_empty(frozen_cpus)) goto out; @@ -344,6 +414,6 @@ void enable_nonboot_cpus(void) } cpus_clear(frozen_cpus); out: - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); } #endif /* CONFIG_PM_SLEEP_SMP */ -- cgit v1.2.3 From 86ef5c9a8edd78e6bf92879f32329d89b2d55b5a Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Fri, 25 Jan 2008 21:08:02 +0100 Subject: cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus() Replace all lock_cpu_hotplug/unlock_cpu_hotplug from the kernel and use get_online_cpus and put_online_cpus instead as it highlights the refcount semantics in these operations. The new API guarantees protection against the cpu-hotplug operation, but it doesn't guarantee serialized access to any of the local data structures. Hence the changes needs to be reviewed. In case of pseries_add_processor/pseries_remove_processor, use cpu_maps_update_begin()/cpu_maps_update_done() as we're modifying the cpu_present_map there. Signed-off-by: Gautham R Shenoy Signed-off-by: Ingo Molnar --- kernel/cpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 656dc3fcbbae..b0c4152995f8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -48,7 +48,7 @@ void __init cpu_hotplug_init(void) #ifdef CONFIG_HOTPLUG_CPU -void lock_cpu_hotplug(void) +void get_online_cpus(void) { might_sleep(); if (cpu_hotplug.active_writer == current) @@ -58,9 +58,9 @@ void lock_cpu_hotplug(void) mutex_unlock(&cpu_hotplug.lock); } -EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +EXPORT_SYMBOL_GPL(get_online_cpus); -void unlock_cpu_hotplug(void) +void put_online_cpus(void) { if (cpu_hotplug.active_writer == current) return; @@ -73,7 +73,7 @@ void unlock_cpu_hotplug(void) mutex_unlock(&cpu_hotplug.lock); } -EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); +EXPORT_SYMBOL_GPL(put_online_cpus); #endif /* CONFIG_HOTPLUG_CPU */ @@ -110,7 +110,7 @@ void cpu_maps_update_done(void) * non zero and goes to sleep again. * * However, this is very difficult to achieve in practice since - * lock_cpu_hotplug() not an api which is called all that often. + * get_online_cpus() not an api which is called all that often. * */ static void cpu_hotplug_begin(void) -- cgit v1.2.3 From 95402b3829010fe1e208f44e4a158ccade88969a Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Fri, 25 Jan 2008 21:08:02 +0100 Subject: cpu-hotplug: replace per-subsystem mutexes with get_online_cpus() This patch converts the known per-subsystem mutexes to get_online_cpus put_online_cpus. It also eliminates the CPU_LOCK_ACQUIRE and CPU_LOCK_RELEASE hotplug notification events. Signed-off-by: Gautham R Shenoy Signed-off-by: Ingo Molnar --- kernel/cpu.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index b0c4152995f8..e0d3a4f56ecb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -218,7 +218,6 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) return -EINVAL; cpu_hotplug_begin(); - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { @@ -271,7 +270,6 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out_release: - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); cpu_hotplug_done(); return err; } @@ -302,7 +300,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) return -EINVAL; cpu_hotplug_begin(); - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret == NOTIFY_BAD) { @@ -326,7 +323,6 @@ out_notify: if (ret != 0) __raw_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); cpu_hotplug_done(); return ret; -- cgit v1.2.3