[PATCH 8/10] ia64 implementation of cpu bulk removal

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ia64 specific implementation of cpu bulk removal. Add the config and make
__cpu_die/__cpu_disable work with cpumask_t.

Signed-off-by: Ashok Raj <[email protected]> 
Signed-off-by: Shaohua Li <[email protected]> 
---

 linux-2.6.17-rc3-root/arch/ia64/Kconfig          |   11 +
 linux-2.6.17-rc3-root/arch/ia64/kernel/irq.c     |   38 -----
 linux-2.6.17-rc3-root/arch/ia64/kernel/smpboot.c |  172 ++++++++++++++++++-----
 linux-2.6.17-rc3-root/arch/ia64/kernel/time.c    |    5 
 4 files changed, 158 insertions(+), 68 deletions(-)

diff -puN arch/ia64/Kconfig~ia64-bulk-cpu-hotplug arch/ia64/Kconfig
--- linux-2.6.17-rc3/arch/ia64/Kconfig~ia64-bulk-cpu-hotplug	2006-05-07 07:46:35.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/ia64/Kconfig	2006-05-07 07:46:35.000000000 +0800
@@ -270,6 +270,17 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
 
+config BULK_CPU_REMOVE
+	bool "Support for bulk removal of CPUs (EXPERIMENTAL)"
+	depends on HOTPLUG_CPU && EXPERIMENTAL
+	help
+	  Say Y if need the ability to remove more than one cpu during cpu
+	  removal. Current mechanisms may be in-efficient when a NUMA
+	  node is being removed, which would involve removing one cpu at a
+	  time. This will let interrupts, timers and processes to be bound
+	  to a CPU that might be removed right after the current cpu is
+	  being offlined.
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	depends on SMP
diff -puN arch/ia64/kernel/irq.c~ia64-bulk-cpu-hotplug arch/ia64/kernel/irq.c
--- linux-2.6.17-rc3/arch/ia64/kernel/irq.c~ia64-bulk-cpu-hotplug	2006-05-07 07:46:35.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/ia64/kernel/irq.c	2006-05-07 07:46:35.000000000 +0800
@@ -114,7 +114,7 @@ unsigned int vectors_in_migration[NR_IRQ
  * Since cpu_online_map is already updated, we just need to check for
  * affinity that has zeros
  */
-static void migrate_irqs(void)
+void ia64_migrate_irqs(void)
 {
 	cpumask_t	mask;
 	irq_desc_t *desc;
@@ -159,34 +159,9 @@ static void migrate_irqs(void)
 	}
 }
 
-void fixup_irqs(void)
+void ia64_fixup_irqs(void)
 {
 	unsigned int irq;
-	extern void ia64_process_pending_intr(void);
-	extern void ia64_disable_timer(void);
-	extern volatile int time_keeper_id;
-
-	ia64_disable_timer();
-
-	/*
-	 * Find a new timesync master
-	 */
-	if (smp_processor_id() == time_keeper_id) {
-		time_keeper_id = first_cpu(cpu_online_map);
-		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
-	}
-
-	/*
-	 * Phase 1: Locate irq's bound to this cpu and
-	 * relocate them for cpu removal.
-	 */
-	migrate_irqs();
-
-	/*
-	 * Phase 2: Perform interrupt processing for all entries reported in
-	 * local APIC.
-	 */
-	ia64_process_pending_intr();
 
 	/*
 	 * Phase 3: Now handle any interrupts not captured in local APIC.
@@ -199,14 +174,5 @@ void fixup_irqs(void)
 			__do_IRQ(irq, NULL);
 		}
 	}
-
-	/*
-	 * Now let processor die. We do irq disable and max_xtp() to
-	 * ensure there is no more interrupts routed to this processor.
-	 * But the local timer interrupt can have 1 pending which we
-	 * take care in timer_interrupt().
-	 */
-	max_xtp();
-	local_irq_disable();
 }
 #endif
diff -puN arch/ia64/kernel/smpboot.c~ia64-bulk-cpu-hotplug arch/ia64/kernel/smpboot.c
--- linux-2.6.17-rc3/arch/ia64/kernel/smpboot.c~ia64-bulk-cpu-hotplug	2006-05-07 07:46:35.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/ia64/kernel/smpboot.c	2006-05-07 07:46:35.000000000 +0800
@@ -639,24 +639,27 @@ clear_cpu_sibling_map(int cpu)
 }
 
 static void
-remove_siblinginfo(int cpu)
+remove_siblinginfo(cpumask_t remove_mask)
 {
 	int last = 0;
+	int cpu;
 
-	if (cpu_data(cpu)->threads_per_core == 1 &&
-	    cpu_data(cpu)->cores_per_socket == 1) {
-		cpu_clear(cpu, cpu_core_map[cpu]);
-		cpu_clear(cpu, cpu_sibling_map[cpu]);
-		return;
-	}
+	for_each_cpu_mask(cpu, remove_mask) {
+		if (cpu_data(cpu)->threads_per_core == 1 &&
+		    cpu_data(cpu)->cores_per_socket == 1) {
+			cpu_clear(cpu, cpu_core_map[cpu]);
+			cpu_clear(cpu, cpu_sibling_map[cpu]);
+			continue;
+		}
 
-	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+		last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
 
-	/* remove it from all sibling map's */
-	clear_cpu_sibling_map(cpu);
+		/* remove it from all sibling map's */
+		clear_cpu_sibling_map(cpu);
+	}
 }
 
-extern void fixup_irqs(void);
+extern void ia64_fixup_irqs(void);
 
 int migrate_platform_irqs(unsigned int cpu)
 {
@@ -699,49 +702,154 @@ int migrate_platform_irqs(unsigned int c
 }
 
 /* must be called with cpucontrol mutex held */
+static cpumask_t cpu_dead_mask = CPU_MASK_NONE;
+static cpumask_t cpu_dead_error_mask = CPU_MASK_NONE;
+static atomic_t disable_cpu_start = ATOMIC_INIT(0); /* 1:start, 2:error */
 int __cpu_disable(cpumask_t remove_mask)
 {
 	int cpu = smp_processor_id();
+	int master = 0;
+	extern void ia64_disable_timer(void);
+	extern void ia64_enable_timer(void);
+	extern void ia64_process_pending_intr(void);
+	extern void ia64_migrate_irqs(void);
+	extern volatile int time_keeper_id;
+
+	/* are we the master cpu? */
+	if (first_cpu(remove_mask) == cpu)
+		master = 1;
+
+	ia64_disable_timer();
+	if (master) {
+		int ret = 0, cpu_tmp;
+		cpumask_t temp_online_map, old_online_map;
+
+		/*
+		 * dont permit boot processor for now
+		 */
+		if (cpu_isset(0, remove_mask) && !bsp_remove_ok) {
+			printk ("Your platform does not support removal of BSP\n");
+			/* let slave report the error */
+			atomic_set(&disable_cpu_start, 2);
+			smp_wmb(); /* set error first */
+			cpu_set(cpu, cpu_dead_error_mask);
+			while (!cpus_equal(cpu_dead_error_mask, remove_mask))
+				cpu_relax();
+			atomic_set(&disable_cpu_start, 0);
+			cpus_clear(cpu_dead_error_mask);
+			ia64_enable_timer();
+			return -EBUSY;
+		}
 
-	BUG_ON(cpus_weight(remove_mask) != 1);
+		old_online_map = cpu_online_map;
+		cpus_andnot(temp_online_map, cpu_online_map, remove_mask);
+		cpu_online_map = temp_online_map;
+		/*
+		 * Find a new timesync master
+		 */
+		if (cpu_isset(time_keeper_id, remove_mask)) {
+			time_keeper_id = first_cpu(cpu_online_map);
+			printk ("CPU %d is now promoted to time-keeper master\n",
+								time_keeper_id);
+		}
+
+		/*
+		 * Check if platform irq's can be migrated
+		 */
+		for_each_cpu_mask(cpu_tmp, remove_mask) {
+			if ((ret = migrate_platform_irqs(cpu_tmp)))
+				break;
+		}
+		if (ret) {
+			cpu_online_map = old_online_map;
+			/* let slave report the error */
+			atomic_set(&disable_cpu_start, 2);
+			smp_wmb(); /* set error first */
+			cpu_set(cpu, cpu_dead_error_mask);
+			while (!cpus_equal(cpu_dead_error_mask, remove_mask))
+				cpu_relax();
+			atomic_set(&disable_cpu_start, 0);
+			cpus_clear(cpu_dead_error_mask);
+			ia64_enable_timer();
+			return -EBUSY;
+		}
+		/*
+		 * Phase I: IRQ migration: migrate irqs to eligible online cpus
+		 */
+		ia64_migrate_irqs();
+
+		smp_mb();
+		/* Let the party begin */
+		atomic_set(&disable_cpu_start, 1);
+	} else {
+		while (atomic_read(&disable_cpu_start) == 0)
+			cpu_relax();
+		if (atomic_read(&disable_cpu_start) == 2) {
+			ia64_enable_timer();
+			cpu_set(cpu, cpu_dead_error_mask);
+			return -EBUSY;
+		}
+	}
 	/*
-	 * dont permit boot processor for now
+	 * Phase II: IRQ Migration: process any local pending interrupts
+	 *           that were queued up.
 	 */
-	if (cpu == 0 && !bsp_remove_ok) {
-		printk ("Your platform does not support removal of BSP\n");
-		return (-EBUSY);
-	}
+	ia64_process_pending_intr();
 
-	cpu_clear(cpu, cpu_online_map);
+	/*
+	 * Now let processor die. We do irq disable and max_xtp() to
+	 * ensure there is no more interrupts routed to this processor.
+	 * But the local timer interrupt can have 1 pending which we
+	 * take care in timer_interrupt().
+	 */
+	max_xtp();
+	local_irq_disable();
 
-	if (migrate_platform_irqs(cpu)) {
-		cpu_set(cpu, cpu_online_map);
-		return (-EBUSY);
-	}
+	cpu_clear(cpu, cpu_callin_map);
+ 	cpu_clear(cpu, cpu_online_map);
+	smp_mb();
+	cpu_set(cpu, cpu_dead_mask);
+	if (!master)
+		return 0;
+	/* master does cleanup */
+	while (!cpus_equal(cpu_dead_mask, remove_mask))
+		cpu_relax();
+
+	remove_siblinginfo(remove_mask);
 
-	remove_siblinginfo(cpu);
-	cpu_clear(cpu, cpu_online_map);
-	fixup_irqs();
+	/*
+	 * Phase III: irq migration.
+	 */
+	ia64_fixup_irqs();
 	local_flush_tlb_all();
-	cpu_clear(cpu, cpu_callin_map);
+	cpus_clear(cpu_dead_mask);
+	atomic_set(&disable_cpu_start, 0);
 	return 0;
 }
 
 void __cpu_die(cpumask_t remove_mask)
 {
 	unsigned int i;
-	int cpu = first_cpu(remove_mask);
+	int cpu;
 
 	for (i = 0; i < 100; i++) {
 		/* They ack this in play_dead by setting CPU_DEAD */
-		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
-		{
-			printk ("CPU %d is now offline\n", cpu);
-			return;
+		for_each_cpu_mask(cpu, remove_mask) {
+			if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+				printk ("CPU %d is now offline\n", cpu);
+				cpu_clear(cpu, remove_mask);
+			}
 		}
+		if (cpus_empty(remove_mask))
+			break;
+
 		msleep(100);
 	}
- 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+
+	if (!cpus_empty(remove_mask)) {
+		for_each_cpu_mask(cpu, remove_mask)
+ 			printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+	}
 }
 #else /* !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(cpumask_t remove_mask)
diff -puN arch/ia64/kernel/time.c~ia64-bulk-cpu-hotplug arch/ia64/kernel/time.c
--- linux-2.6.17-rc3/arch/ia64/kernel/time.c~ia64-bulk-cpu-hotplug	2006-05-07 07:46:35.000000000 +0800
+++ linux-2.6.17-rc3-root/arch/ia64/kernel/time.c	2006-05-07 07:46:35.000000000 +0800
@@ -241,6 +241,11 @@ void __devinit ia64_disable_timer(void)
 	ia64_set_itv(1 << 16);
 }
 
+void __devinit ia64_enable_timer(void)
+{
+	ia64_set_itv(IA64_TIMER_VECTOR);
+}
+
 void __init
 time_init (void)
 {
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux