Re: [PATCH linux-2.6 04/04] brsem: convert cpucontrol to brsem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



04_brsem_convert-cpucontrol-to-brsem.patch

	cpucontrol synchronizes cpu hotplugging and used to be a
	semaphore.  This patch converts it to brsem.

Signed-off-by: Tejun Heo <[email protected]>

 include/linux/brsem.h |    4 +-
 include/linux/cpu.h   |   16 ++++++---
 init/main.c           |    1 
 kernel/brsem.c        |   38 +++++++++++++++--------
 kernel/cpu.c          |   81 +++++++++++++++++++++++++++++++++++++++++++-------
 5 files changed, 110 insertions(+), 30 deletions(-)

Index: linux-work/include/linux/cpu.h
===================================================================
--- linux-work.orig/include/linux/cpu.h	2005-09-25 15:26:32.000000000 +0900
+++ linux-work/include/linux/cpu.h	2005-09-25 15:42:04.000000000 +0900
@@ -23,7 +23,7 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
-#include <asm/semaphore.h>
+#include <linux/brsem.h>
 
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
@@ -59,10 +59,11 @@ extern struct sysdev_class cpu_sysdev_cl
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
-extern struct semaphore cpucontrol;
-#define lock_cpu_hotplug()	down(&cpucontrol)
-#define unlock_cpu_hotplug()	up(&cpucontrol)
-#define lock_cpu_hotplug_interruptible() down_interruptible(&cpucontrol)
+extern struct brsem cpucontrol;
+extern struct task_struct *cpucontrol_holder;
+#define lock_cpu_hotplug()	brsem_down_read(&cpucontrol)
+#define unlock_cpu_hotplug()	brsem_up_read(&cpucontrol)
+#define is_cpu_hotplug_holder()	(cpucontrol_holder == current)
 #define hotcpu_notifier(fn, pri) {				\
 	static struct notifier_block fn##_nb =			\
 		{ .notifier_call = fn, .priority = pri };	\
@@ -74,11 +75,14 @@ extern int __attribute__((weak)) smp_pre
 #else
 #define lock_cpu_hotplug()	do { } while (0)
 #define unlock_cpu_hotplug()	do { } while (0)
-#define lock_cpu_hotplug_interruptible() 0
+#define is_cpu_hotplug_holder()	0
 #define hotcpu_notifier(fn, pri)
 
 /* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+/* cpucontrol initializer, called from init/main.c */
+void cpucontrol_init(void);
+
 #endif /* _LINUX_CPU_H_ */
Index: linux-work/init/main.c
===================================================================
--- linux-work.orig/init/main.c	2005-09-25 15:42:03.000000000 +0900
+++ linux-work/init/main.c	2005-09-25 15:42:04.000000000 +0900
@@ -513,6 +513,7 @@ asmlinkage void __init start_kernel(void
 	setup_per_cpu_pageset();
 	numa_policy_init();
 	brsem_init_early();
+	cpucontrol_init();
 	if (late_time_init)
 		late_time_init();
 	calibrate_delay();
Index: linux-work/kernel/cpu.c
===================================================================
--- linux-work.orig/kernel/cpu.c	2005-09-25 15:26:32.000000000 +0900
+++ linux-work/kernel/cpu.c	2005-09-25 15:42:04.000000000 +0900
@@ -15,8 +15,39 @@
 #include <linux/stop_machine.h>
 #include <asm/semaphore.h>
 
-/* This protects CPUs going up and down... */
-DECLARE_MUTEX(cpucontrol);
+/*
+ * cpucontrol is a brsem used to synchronize cpu hotplug events.
+ * Invoking lock_cpu_hotplug() read-locks cpucontrol and no
+ * hotplugging events will occur until it's released.
+ *
+ * Unfortunately, brsem itself makes use of lock_cpu_hotplug() and
+ * performing brsem write-lock operations on cpucontrol deadlocks.
+ * This is avoided by...
+ *
+ * a. guaranteeing that cpu hotplug events won't occur during the
+ *    write-lock operations, and
+ *
+ * b. skipping lock_cpu_hotplug() inside brsem.
+ *
+ * #a is achieved by acquiring and releasing cpucontrol_mutex outside
+ * cpucontrol write-lock.  #b is achieved by skipping
+ * lock_cpu_hotplug() inside brsem if the current task is
+ * cpucontrol_mutex holder (is_cpu_hotplug_holder() test).
+ *
+ * Also, note that cpucontrol is first initialized with
+ * BRSEM_BYPASS_INITIALIZER and then initialized again with
+ * __create_brsem() instead of simply using create_brsem().  This is
+ * necessary as cpucontrol brsem gets used way before brsem subsystem
+ * becomes up and running.
+ *
+ * Until brsem is properly initialized, all brsem ops succeed
+ * unconditionally.  cpucontrol becomes operational only after
+ * cpucontrol_init() is finished, which should be called after
+ * brsem_init_early().
+ */
+struct brsem cpucontrol = BRSEM_BYPASS_INITIALIZER;
+static DECLARE_MUTEX(cpucontrol_mutex);
+struct task_struct *cpucontrol_holder;
 
 static struct notifier_block *cpu_chain;
 
@@ -25,22 +56,45 @@ int register_cpu_notifier(struct notifie
 {
 	int ret;
 
-	if ((ret = down_interruptible(&cpucontrol)) != 0)
+	if ((ret = brsem_down_read_interruptible(&cpucontrol)) != 0)
 		return ret;
 	ret = notifier_chain_register(&cpu_chain, nb);
-	up(&cpucontrol);
+	brsem_up_read(&cpucontrol);
 	return ret;
 }
 EXPORT_SYMBOL(register_cpu_notifier);
 
 void unregister_cpu_notifier(struct notifier_block *nb)
 {
-	down(&cpucontrol);
+	brsem_down_read(&cpucontrol);
 	notifier_chain_unregister(&cpu_chain, nb);
-	up(&cpucontrol);
+	brsem_up_read(&cpucontrol);
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
+static int write_lock_cpucontrol(void)
+{
+	int ret;
+
+	if ((ret = down_interruptible(&cpucontrol_mutex)) != 0)
+		return ret;
+	BUG_ON(cpucontrol_holder);
+	cpucontrol_holder = current;
+	if ((ret = brsem_down_write_interruptible(&cpucontrol)) != 0) {
+		cpucontrol_holder = NULL;
+		up(&cpucontrol_mutex);
+		return ret;
+	}
+	return 0;
+}
+
+static void write_unlock_cpucontrol(void)
+{
+	brsem_up_write(&cpucontrol);
+	cpucontrol_holder = NULL;
+	up(&cpucontrol_mutex);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void check_for_tasks(int cpu)
 {
@@ -80,7 +134,7 @@ int cpu_down(unsigned int cpu)
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	if ((err = lock_cpu_hotplug_interruptible()) != 0)
+	if ((err = write_lock_cpucontrol()) != 0)
 		return err;
 
 	if (num_online_cpus() == 1) {
@@ -145,7 +199,7 @@ out_thread:
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
 out:
-	unlock_cpu_hotplug();
+	write_unlock_cpucontrol();
 	return err;
 }
 #endif /*CONFIG_HOTPLUG_CPU*/
@@ -155,7 +209,7 @@ int __devinit cpu_up(unsigned int cpu)
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	if ((ret = down_interruptible(&cpucontrol)) != 0)
+	if ((ret = write_lock_cpucontrol()) != 0)
 		return ret;
 
 	if (cpu_online(cpu) || !cpu_present(cpu)) {
@@ -184,6 +238,13 @@ out_notify:
 	if (ret != 0)
 		notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
 out:
-	up(&cpucontrol);
+	write_unlock_cpucontrol();
 	return ret;
 }
+
+void cpucontrol_init(void)
+{
+	struct brsem *sem;
+	sem = __create_brsem(&cpucontrol);
+	BUG_ON(!sem);
+}
Index: linux-work/kernel/brsem.c
===================================================================
--- linux-work.orig/kernel/brsem.c	2005-09-25 15:42:03.000000000 +0900
+++ linux-work/kernel/brsem.c	2005-09-25 15:42:04.000000000 +0900
@@ -156,7 +156,7 @@ static void coac_schedule_work_per_cpu(v
 	schedule_work(works + smp_processor_id());
 }
 
-static void call_on_all_cpus(void (*func)(void *), void *data)
+static void call_on_all_cpus(void (*func)(void *), void *data, int skip_cpulock)
 {
 	static DECLARE_MUTEX(coac_mutex); /* serializes uses of coac_works */
 	static struct work_struct coac_works[NR_CPUS];
@@ -174,7 +174,8 @@ static void call_on_all_cpus(void (*func
 	}
 
 	down(&coac_mutex);
-	lock_cpu_hotplug();
+	if (!skip_cpulock)
+		lock_cpu_hotplug();
 
 	/*
 	 * If we're on keventd, scheduling work and waiting for it
@@ -202,7 +203,8 @@ static void call_on_all_cpus(void (*func
 		wait_for_completion(&coac_arg.completion);
 	}
 
-	unlock_cpu_hotplug();
+	if (!skip_cpulock)
+		unlock_cpu_hotplug();
 	up(&coac_mutex);
 }
 
@@ -305,7 +307,7 @@ static int expand_brsem(int target_idx)
 	ebarg.new_len = new_len;
 	atomic_set(&ebarg.failed, 0);
 
-	call_on_all_cpus(expand_brsem_cpucb, &ebarg);
+	call_on_all_cpus(expand_brsem_cpucb, &ebarg, 0);
 
 	res = -ENOMEM;
 	if (atomic_read(&ebarg.failed))
@@ -405,13 +407,13 @@ static void sync_brsem_cpucb(void *data)
 	__brsem_leave_crit();
 }
 
-static void sync_brsem(struct brsem *sem, int write_locking)
+static void sync_brsem(struct brsem *sem, int write_locking, int skip_cpulock)
 {
 	int cpu;
 
 	if (brsem_initialized) {
 		struct sync_brsem_arg sbarg = { sem, write_locking };
-		call_on_all_cpus(sync_brsem_cpucb, &sbarg);
+		call_on_all_cpus(sync_brsem_cpucb, &sbarg, skip_cpulock);
 		return;
 	}
 
@@ -420,7 +422,8 @@ static void sync_brsem(struct brsem *sem
 	 * single threaded.  Sync manually.  Note that lockings are
 	 * not necessary here.  They're done just for consistency.
 	 */
-	lock_cpu_hotplug();
+	if (!skip_cpulock)
+		lock_cpu_hotplug();
 	spin_lock_crit(&sem->lock);
 	for_each_online_cpu(cpu) {
 		int *p = per_cpu(brsem_rcnt_ar, cpu) + sem->idx;
@@ -429,14 +432,15 @@ static void sync_brsem(struct brsem *sem
 		*p = write_locking ? INT_MIN : 0;
 	}
 	spin_unlock_crit(&sem->lock);
-	unlock_cpu_hotplug();
+	if (!skip_cpulock)
+		unlock_cpu_hotplug();
 }
 
 static void do_destroy_brsem(struct brsem *sem)
 {
 	check_idx(sem);
 
-	sync_brsem(sem, 0);
+	sync_brsem(sem, 0, 0);
 	BUG_ON(sem->master_rcnt != 0);
 
 	spin_lock(&brsem_alloc_lock);
@@ -586,8 +590,14 @@ void __brsem_up_read_slow(struct brsem *
 	__brsem_leave_crit();
 }
 
+/*
+ * skip_cpulock is used to avoid deadlocks when write operations are
+ * performed on cpu hotplug brsem.  See kernel/cpu.c for more
+ * information.
+ */
 static int __brsem_down_write(struct brsem *sem, int interruptible)
 {
+	int skip_cpulock = is_cpu_hotplug_holder();
 	int res;
 
 	if (is_bypass(sem))
@@ -601,7 +611,7 @@ static int __brsem_down_write(struct brs
 	} else
 		down(&sem->write_mutex);
 
-	sync_brsem(sem, 1);
+	sync_brsem(sem, 1, skip_cpulock);
 
 	spin_lock_crit(&sem->lock);
 
@@ -619,7 +629,7 @@ static int __brsem_down_write(struct brs
 		finish_wait(&sem->write_wait, &wait);
 
 		if (interruptible && signal_pending(current)) {
-			sync_brsem(sem, 0);
+			sync_brsem(sem, 0, skip_cpulock);
 			wake_up_all(&sem->read_wait);
 			up(&sem->write_mutex);
 			return -EINTR;
@@ -661,12 +671,14 @@ int brsem_down_write_interruptible(struc
  */
 void brsem_up_write(struct brsem *sem)
 {
+	int skip_cpulock = is_cpu_hotplug_holder();
+
 	if (is_bypass(sem))
 		return;
 	check_idx(sem);
 
 	BUG_ON(sem->master_rcnt);
-	sync_brsem(sem, 0);
+	sync_brsem(sem, 0, skip_cpulock);
 	wake_up_all(&sem->read_wait);
 	up(&sem->write_mutex);
 }
@@ -856,7 +868,7 @@ void __init brsem_init(void)
 	brsem_initialized = 1;
 
 	/* Make sure other cpus see above change */
-	call_on_all_cpus(dummy_cpucb, NULL);
+	call_on_all_cpus(dummy_cpucb, NULL, 0);
 }
 
 EXPORT_SYMBOL(__create_brsem);
Index: linux-work/include/linux/brsem.h
===================================================================
--- linux-work.orig/include/linux/brsem.h	2005-09-25 15:42:03.000000000 +0900
+++ linux-work/include/linux/brsem.h	2005-09-25 15:42:04.000000000 +0900
@@ -65,7 +65,9 @@ void brsem_init(void);
 
 /*
  * The following initializer and __create_brsem() are for cases where
- * brsem should be used before brsem_init_early() is finished.
+ * brsem should be used before brsem_init_early() is finished.  If
+ * you're trying to use brsem for such cases, refer to
+ * include/linux/cpu.h and kernel/cpu.c for example.
  */
 #define BRSEM_BYPASS_INITIALIZER	{ .idx = 0, .flags = BRSEM_F_BYPASS }
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux