(Refcount + Workqueue) implementation for cpu_hotplug "locking".
It is analogous to a unfair rwsem. But it is *extremely* lightweight in
the reader-fast-path.
Based on valuable inputs by Paul E McKenney, Srivatsa Vaddagiri, Dipankar Sarma
and Ingo Molnar.
Signed-off-by : Gautham R Shenoy <[email protected]>
---
include/linux/cpu.h | 2
init/main.c | 1
kernel/cpu.c | 231 ++++++++++++++++++++++++++++++++++++++++++----------
3 files changed, 194 insertions(+), 40 deletions(-)
Index: hotplug/init/main.c
===================================================================
--- hotplug.orig/init/main.c
+++ hotplug/init/main.c
@@ -573,6 +573,7 @@ asmlinkage void __init start_kernel(void
vfs_caches_init_early();
cpuset_init_early();
mem_init();
+ cpu_hotplug_init();
kmem_cache_init();
setup_per_cpu_pageset();
numa_policy_init();
Index: hotplug/include/linux/cpu.h
===================================================================
--- hotplug.orig/include/linux/cpu.h
+++ hotplug/include/linux/cpu.h
@@ -65,6 +65,7 @@ static inline void unregister_cpu_notifi
extern struct sysdev_class cpu_sysdev_class;
#ifdef CONFIG_HOTPLUG_CPU
+extern void cpu_hotplug_init(void);
/* Stop CPUs going up and down. */
extern void lock_cpu_hotplug(void);
extern void unlock_cpu_hotplug(void);
@@ -78,6 +79,7 @@ extern void unlock_cpu_hotplug(void);
int cpu_down(unsigned int cpu);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
#else
+#define cpu_hotplug_init() do { } while (0)
#define lock_cpu_hotplug() do { } while (0)
#define unlock_cpu_hotplug() do { } while (0)
#define lock_cpu_hotplug_interruptible() 0
Index: hotplug/kernel/cpu.c
===================================================================
--- hotplug.orig/kernel/cpu.c
+++ hotplug/kernel/cpu.c
@@ -1,6 +1,10 @@
/* CPU control.
* (C) 2001, 2002, 2003, 2004 Rusty Russell
*
+ * Hotplug - Locking
+ * 2006: Implemented by Gautham R Shenoy with the aid of some valuable inputs
+ * from Paul E McKenney, Srivatsa Vaddagiri, Dipankar Sarma and Ingo Molnar.
+ *
* This code is licenced under the GPL.
*/
#include <linux/proc_fs.h>
@@ -14,10 +18,11 @@
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
-
-/* This protects CPUs going up and down... */
-static DEFINE_MUTEX(cpu_add_remove_lock);
-static DEFINE_MUTEX(cpu_bitmask_lock);
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/rcupdate.h>
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
@@ -26,52 +31,202 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(c
*/
static int cpu_hotplug_disabled;
+/************************************************************************
+ * A FEW CONTEXT SPECIFIC DEFINITIONS *
+ * ---------------------------------------------------------------------*
+ * - reader : task which tries to *prevent* a cpu hotplug event. *
+ * - writer : task which tries to *perform* a cpu hotplug event. *
+ * - write-operation: cpu hotplug operation. *
+ * *
+ ************************************************************************/
+
+/************************************************************************
+ * THE PROTOCOL *
+ *----------------------------------------------------------------------*
+ *- Analogous to RWSEM, only not so fair *
+ *- Readers assume control iff: *
+ * a) No other reader has a reference and no writer is writing. *
+ * OR *
+ * b) Atleast one reader (on *any* cpu) has a reference. *
+ *- Writer assumes control iff: *
+ * there are no active readers and there are no active writers. *
+ *- Writer, on completion would preferable wake up other waiting. *
+ * writers over the waiting readers. *
+ *- The *last* writer wakes up all the waiting readers. *
+ ************************************************************************/
+
+static struct {
+ unsigned int status; /* Read mostly global */
+ /* The following variables are only for the slowpath */
+ spinlock_t lock;
+ wait_queue_head_t read_queue;
+ wait_queue_head_t write_queue;
+} cpu_hotplug;
+
+DEFINE_PER_CPU(int, refcount) = {0};
+
+ /* cpu_hotplug.status can be one of the following */
+#define NO_WRITERS 0x00000000 /* Obvious from name */
+#define WRITER_WAITING 0x00000001 /* Writer present, but is waiting */
+#define WRITER_ACTIVE 0x00000002 /* Writer is performing write */
+
+#define writer_exists() cpu_hotplug.status
+
+/* Returns the number of readers in the system */
+static inline int nr_readers(void)
+{
+ int count=0, i;
+
+ for_each_possible_cpu(i)
+ count += per_cpu(refcount, i);
+
+ return count;
+}
+
#ifdef CONFIG_HOTPLUG_CPU
+void __init cpu_hotplug_init(void)
+{
+ cpu_hotplug.status = NO_WRITERS;
+ spin_lock_init(&cpu_hotplug.lock);
+ init_waitqueue_head(&cpu_hotplug.read_queue);
+ init_waitqueue_head(&cpu_hotplug.write_queue);
+}
-/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
-static struct task_struct *recursive;
-static int recursive_depth;
+static void slow_path_reader_lock(void);
+static void slow_path_reader_unlock(void);
+/**********************************************************************
+ MAIN CPU_HOTPLUG (LOCK/ UNLOCK/ BEGIN/ DONE) CODE
+ *********************************************************************/
+
+/* Blocks iff write operation is on-going
+* OR
+* A writer is waiting and there are no other readers.
+*/
void lock_cpu_hotplug(void)
{
- struct task_struct *tsk = current;
-
- if (tsk == recursive) {
- static int warnings = 10;
- if (warnings) {
- printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
- WARN_ON(1);
- warnings--;
- }
- recursive_depth++;
+ preempt_disable();
+ if (likely(!writer_exists())) {
+ per_cpu(refcount, smp_processor_id())++;
+ preempt_enable();
return;
}
- mutex_lock(&cpu_bitmask_lock);
- recursive = tsk;
+ preempt_enable();
+ slow_path_reader_lock();
}
EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
void unlock_cpu_hotplug(void)
{
- WARN_ON(recursive != current);
- if (recursive_depth) {
- recursive_depth--;
+ preempt_disable();
+ if (likely(!writer_exists())) {
+ per_cpu(refcount, smp_processor_id())--;
+ preempt_enable();
return;
}
- mutex_unlock(&cpu_bitmask_lock);
- recursive = NULL;
+ preempt_enable();
+ slow_path_reader_unlock();
}
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void cpu_hotplug_begin(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ spin_lock(&cpu_hotplug.lock);
+ if (likely(cpu_hotplug.status == NO_WRITERS)) {
+ cpu_hotplug.status = WRITER_WAITING;
+ spin_unlock(&cpu_hotplug.lock);
+
+ /* Allow new readers to see this change in status and
+ * notify them to take the slowpath.
+ *
+ * Also allow the older readers who have not seen the status
+ * change to bump up/down their percpu refcount.
+ */
+ synchronize_sched();
+
+ spin_lock(&cpu_hotplug.lock);
+ if (!nr_readers()) {
+ cpu_hotplug.status = WRITER_ACTIVE;
+ spin_unlock(&cpu_hotplug.lock);
+ return;
+ }
+ }
+
+ add_wait_queue_exclusive(&cpu_hotplug.write_queue, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&cpu_hotplug.lock);
+ schedule();
+ remove_wait_queue(&cpu_hotplug.write_queue, &wait);
+}
+
+static void cpu_hotplug_done(void)
+{
+ spin_lock(&cpu_hotplug.lock);
+
+ if (!list_empty(&cpu_hotplug.write_queue.task_list))
+ wake_up(&cpu_hotplug.write_queue);
+ else {
+ cpu_hotplug.status = NO_WRITERS;
+ if (!list_empty(&cpu_hotplug.read_queue.task_list))
+ wake_up_all(&cpu_hotplug.read_queue);
+ }
+
+ spin_unlock(&cpu_hotplug.lock);
+}
+
+/**********************************************************************
+ READER SLOWPATH CODE.
+ **********************************************************************/
+#ifdef CONFIG_HOTPLUG_CPU
+static void slow_path_reader_lock(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ spin_lock(&cpu_hotplug.lock);
+
+ while (writer_exists()) {
+ /* This check makes the whole business unfair */
+ if (cpu_hotplug.status == WRITER_WAITING && nr_readers())
+ goto out;
+
+ add_wait_queue(&cpu_hotplug.read_queue, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&cpu_hotplug.lock);
+ schedule();
+ remove_wait_queue(&cpu_hotplug.read_queue, &wait);
+ spin_lock(&cpu_hotplug.lock);
+ }
+out:
+ per_cpu(refcount, smp_processor_id())++;
+ spin_unlock(&cpu_hotplug.lock);
+
+}
+
+static void slow_path_reader_unlock(void)
+{
+ spin_lock(&cpu_hotplug.lock);
+ per_cpu(refcount, smp_processor_id())--;
+ if (!nr_readers() &&
+ !list_empty(&cpu_hotplug.write_queue.task_list)) {
+ cpu_hotplug.status = WRITER_ACTIVE;
+ wake_up(&cpu_hotplug.write_queue);
+ }
+ spin_unlock(&cpu_hotplug.lock);
+}
+
#endif /* CONFIG_HOTPLUG_CPU */
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
{
int ret;
- mutex_lock(&cpu_add_remove_lock);
+ lock_cpu_hotplug();
ret = raw_notifier_chain_register(&cpu_chain, nb);
- mutex_unlock(&cpu_add_remove_lock);
+ unlock_cpu_hotplug();
return ret;
}
@@ -81,9 +236,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
{
- mutex_lock(&cpu_add_remove_lock);
+ lock_cpu_hotplug();
raw_notifier_chain_unregister(&cpu_chain, nb);
- mutex_unlock(&cpu_add_remove_lock);
+ unlock_cpu_hotplug();
}
EXPORT_SYMBOL(unregister_cpu_notifier);
@@ -146,9 +301,7 @@ static int _cpu_down(unsigned int cpu)
cpu_clear(cpu, tmp);
set_cpus_allowed(current, tmp);
- mutex_lock(&cpu_bitmask_lock);
p = __stop_machine_run(take_cpu_down, NULL, cpu);
- mutex_unlock(&cpu_bitmask_lock);
if (IS_ERR(p)) {
/* CPU didn't die: tell everyone. Can't complain. */
@@ -192,13 +345,13 @@ int cpu_down(unsigned int cpu)
{
int err = 0;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_begin();
if (cpu_hotplug_disabled)
err = -EBUSY;
else
err = _cpu_down(cpu);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_hotplug_done();
return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/
@@ -221,9 +374,7 @@ static int __devinit _cpu_up(unsigned in
}
/* Arch-specific enabling code. */
- mutex_lock(&cpu_bitmask_lock);
ret = __cpu_up(cpu);
- mutex_unlock(&cpu_bitmask_lock);
if (ret != 0)
goto out_notify;
BUG_ON(!cpu_online(cpu));
@@ -243,13 +394,13 @@ int __devinit cpu_up(unsigned int cpu)
{
int err = 0;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_begin();
if (cpu_hotplug_disabled)
err = -EBUSY;
else
err = _cpu_up(cpu);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_hotplug_done();
return err;
}
@@ -260,7 +411,7 @@ int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_begin();
first_cpu = first_cpu(cpu_present_map);
if (!cpu_online(first_cpu)) {
error = _cpu_up(first_cpu);
@@ -301,7 +452,7 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled");
}
out:
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_hotplug_done();
return error;
}
@@ -310,9 +461,9 @@ void enable_nonboot_cpus(void)
int cpu, error;
/* Allow everyone to use the CPU hotplug again */
- mutex_lock(&cpu_add_remove_lock);
+ lock_cpu_hotplug();
cpu_hotplug_disabled = 0;
- mutex_unlock(&cpu_add_remove_lock);
+ unlock_cpu_hotplug();
printk("Enabling non-boot CPUs ...\n");
for_each_cpu_mask(cpu, frozen_cpus) {
--
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]