Re: swsusp smp problems... [was Re: swsusp: Remove arch-specific references from generic code]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Sorry for the delay, I had an urgent work to do yestarday ...

On Monday, 21 of March 2005 11:41, Pavel Machek wrote:
> Hi!
> 
> > > At least part of them is caused by CONFIG_MTRR. I had to disable it on
> > > i386 to make it work...
> > 
> > Later today I'll check if that helps on x86-64.

On vanilla 2.6.11-mm4 processes freeze successfully when CONFIG_MTRR
is disabled, but it doesn't help if the CPU hotplug code is used before freezing
the processes.


> > Anyway in the meantime I have played a bit with the CPU hotplug code.
> > It needs some work, but looks promising.  I've changed disable_nonboot_cpus()
> > to use the CPU hotplug code and it seems to work.  Well, almost, because some
> > traces of the second CPU remain in the kernel, as some things do not work
> > properly (eg flush_tlb_others() is called with a mask that triggers a BUG()
> > in it etc.).  This should not be difficult to get fixed, however.  Strangely enough,
> > the processes still fail to freeze after the second CPU has been disabled
> > (specifically one of them, which is "syslogd").  I'm going to investigate this
> > more thoroughly.
> > 
> > Turning the second CPU back on does not work for me, but in fact I haven't
> > looked at it so far.
> 
> Can youm mail me (and probably l-k) the latest diffs? I started
> playing with it, too... (remember that scrap-metal machine?).

All right, the current diff between 2.6.11-mm4 and my development tree on the
SMP box is attached.  It's quite big, as it contains the CPU hotplug code that
I've dragged to the x86-64 tree.  There's a lot of debug stuff (probably
bugs too) in it and it doesn't let the box actually suspend.  Also, it doesn't
enable the non-boot CPUs after they've been disabled.

Greets,
Rafael


-- 
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
		-- Lewis Carroll "Alice's Adventures in Wonderland"
diff -Nrup linux-2.6.11-mm4/arch/x86_64/Kconfig linux-2.6.11-mm4-new/arch/x86_64/Kconfig
--- linux-2.6.11-mm4/arch/x86_64/Kconfig	2005-03-17 01:04:34.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/Kconfig	2005-03-18 00:13:17.000000000 +0100
@@ -451,6 +451,16 @@ config UNORDERED_IO
 
 source "drivers/pci/Kconfig"
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && HOTPLUG && EXPERIMENTAL
+	---help---
+	  Say Y here to experiment with turning CPUs off and on
+	  or if you want to use software suspend (swsusp) with SMP.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
+	  Say N.
+
 source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
diff -Nrup linux-2.6.11-mm4/arch/x86_64/kernel/apic.c linux-2.6.11-mm4-new/arch/x86_64/kernel/apic.c
--- linux-2.6.11-mm4/arch/x86_64/kernel/apic.c	2005-03-17 01:04:34.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/kernel/apic.c	2005-03-20 21:22:50.000000000 +0100
@@ -816,7 +816,7 @@ void __init setup_secondary_APIC_clock(v
 	local_irq_enable();
 }
 
-void __init disable_APIC_timer(void)
+void disable_APIC_timer(void)
 {
 	if (using_apic_timer) {
 		unsigned long v;
diff -Nrup linux-2.6.11-mm4/arch/x86_64/kernel/irq.c linux-2.6.11-mm4-new/arch/x86_64/kernel/irq.c
--- linux-2.6.11-mm4/arch/x86_64/kernel/irq.c	2005-03-17 01:04:34.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/kernel/irq.c	2005-03-21 00:12:14.000000000 +0100
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/io_apic.h>
+#include <linux/delay.h>
 
 atomic_t irq_err_count;
 #ifdef CONFIG_X86_IO_APIC
@@ -106,3 +107,43 @@ asmlinkage unsigned int do_IRQ(struct pt
 
 	return 1;
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+	unsigned int irq;
+	static int warned;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		cpumask_t mask;
+
+		cpus_and(mask, irq_affinity[irq], map);
+		if (any_online_cpu(mask) == NR_CPUS) {
+			printk("Breaking affinity for irq %i\n", irq);
+			mask = map;
+		}
+		if (irq_desc[irq].handler->set_affinity) {
+			printk("Setting affinity for irq %d to 0x%x\n", irq, mask);
+			irq_desc[irq].handler->set_affinity(irq, mask);
+		}
+		else if (irq_desc[irq].action && !(warned++))
+			printk("Cannot set affinity for irq %i\n", irq);
+	}
+
+#if 0
+	barrier();
+	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
+	   [note the nop - the interrupt-enable boundary on x86 is two
+	   instructions from sti] - to flush out pending hardirqs and
+	   IPIs. After this point nothing is supposed to reach this CPU." */
+	__asm__ __volatile__("sti; nop; cli");
+	barrier();
+#else
+	/* That doesn't seem sufficient.  Give it 1ms. */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+#endif
+}
+#endif
diff -Nrup linux-2.6.11-mm4/arch/x86_64/kernel/process.c linux-2.6.11-mm4-new/arch/x86_64/kernel/process.c
--- linux-2.6.11-mm4/arch/x86_64/kernel/process.c	2005-03-18 00:03:32.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/kernel/process.c	2005-03-18 00:56:10.000000000 +0100
@@ -49,6 +49,7 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 #include <asm/ia32.h>
+#include <asm/cpu.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -154,6 +155,34 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+	/* Ack it */
+	__get_cpu_var(cpu_state) = CPU_DEAD;
+
+	/* We shouldn't have to disable interrupts while dead, but
+	* some interrupts just don't seem to go away, and this makes
+	* it "work" for testing purposes. */
+	/* Death loop */
+	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+		cpu_relax();
+
+	local_irq_disable();
+	__flush_tlb_all();
+	cpu_set(smp_processor_id(), cpu_online_map);
+	enable_APIC_timer();
+	local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
@@ -174,6 +203,10 @@ void cpu_idle (void)
 			idle = pm_idle;
 			if (!idle)
 				idle = default_idle;
+
+			if (cpu_is_offline(smp_processor_id()))
+				play_dead();
+
 			idle();
 		}
 
diff -Nrup linux-2.6.11-mm4/arch/x86_64/kernel/smpboot.c linux-2.6.11-mm4-new/arch/x86_64/kernel/smpboot.c
--- linux-2.6.11-mm4/arch/x86_64/kernel/smpboot.c	2005-03-17 01:04:34.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/kernel/smpboot.c	2005-03-20 22:02:55.000000000 +0100
@@ -76,6 +76,9 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __c
 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -919,13 +922,97 @@ void __devinit smp_prepare_boot_cpu(void
 	cpu_set(smp_processor_id(), cpu_callout_map);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* must be called with the cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+	/* get the target out of its holding state */
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	wmb();
+
+	/* wait for the processor to ack it. timeout? */
+	while (!cpu_online(cpu))
+		cpu_relax();
+
+	fixup_irqs(cpu_online_map);
+	/* counter the disable in fixup_irqs() */
+	local_irq_enable();
+	return 0;
+}
+
+int __cpu_disable(void)
+{
+	cpumask_t map = cpu_online_map;
+	int cpu = smp_processor_id();
+
+	printk(KERN_WARNING "__cpu_disable(): cpu = %d\n", cpu);
+	/*
+	 * Perhaps use cpufreq to drop frequency, but that could go
+	 * into generic code.
+ 	 *
+	 * We won't take down the boot processor on x86-64
+	 */
+	if (cpu == 0)
+		return -EBUSY;
+
+	/* We enable the timer again on the exit path of the death loop */
+	disable_APIC_timer();
+	/* Allow any queued timer interrupts to get serviced */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+
+	cpu_clear(cpu, map);
+	fixup_irqs(map);
+	/* It's now safe to remove this processor from the online map */
+	__flush_tlb_global();
+	cpu_clear(cpu, cpu_online_map);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	/* We don't do anything here: idle task is faking death itself. */
+	unsigned int i;
+
+	for (i = 0; i < 10; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+			return;
+		current->state = TASK_UNINTERRUPTIBLE;
+		schedule_timeout(HZ/10);
+	}
+ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+	return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	/* We said "no" in __cpu_disable */
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 int __devinit __cpu_up(unsigned int cpu)
 {
+#ifdef CONFIG_HOTPLUG_CPU
+	/* Already up, and in cpu_quiescent now? */
+	if (cpu_isset(cpu, smp_commenced_mask)) {
+		cpu_enable(cpu);
+		return 0;
+	}
+#else
 	/* This only works at boot for x86.  See "rewrite" above. */
 	if (cpu_isset(cpu, smp_commenced_mask)) {
 		local_irq_enable();
 		return -ENOSYS;
 	}
+#endif
 
 	/* In case one didn't come up */
 	if (!cpu_isset(cpu, cpu_callin_map)) {
diff -Nrup linux-2.6.11-mm4/arch/x86_64/kernel/traps.c linux-2.6.11-mm4-new/arch/x86_64/kernel/traps.c
--- linux-2.6.11-mm4/arch/x86_64/kernel/traps.c	2005-03-18 00:03:24.000000000 +0100
+++ linux-2.6.11-mm4-new/arch/x86_64/kernel/traps.c	2005-03-18 00:09:24.000000000 +0100
@@ -590,6 +590,12 @@ asmlinkage void default_do_nmi(struct pt
 {
 	unsigned char reason = 0;
 
+#ifdef CONFIG_HOTPLUG_CPU
+	/* Ignore offline CPUs */
+	if (!cpu_online(smp_processor_id()))
+		return;
+#endif
+
 	/* Only the BSP gets external NMIs from the system.  */
 	if (!smp_processor_id())
 		reason = get_nmi_reason();
diff -Nrup linux-2.6.11-mm4/drivers/serial/serial_core.c linux-2.6.11-mm4-new/drivers/serial/serial_core.c
--- linux-2.6.11-mm4/drivers/serial/serial_core.c	2005-03-17 01:04:37.000000000 +0100
+++ linux-2.6.11-mm4-new/drivers/serial/serial_core.c	2005-03-18 22:54:35.000000000 +0100
@@ -1831,6 +1831,9 @@ int uart_suspend_port(struct uart_driver
 {
 	struct uart_state *state = drv->state + port->line;
 
+	if (uart_console(port))
+		return 0;
+
 	down(&state->sem);
 
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
@@ -1869,6 +1872,9 @@ int uart_resume_port(struct uart_driver 
 {
 	struct uart_state *state = drv->state + port->line;
 
+	if (uart_console(port))
+		return 0;
+
 	down(&state->sem);
 
 	uart_change_pm(state, 0);
diff -Nrup linux-2.6.11-mm4/include/asm-x86_64/irq.h linux-2.6.11-mm4-new/include/asm-x86_64/irq.h
--- linux-2.6.11-mm4/include/asm-x86_64/irq.h	2005-03-17 01:04:38.000000000 +0100
+++ linux-2.6.11-mm4-new/include/asm-x86_64/irq.h	2005-03-16 23:55:13.000000000 +0100
@@ -10,6 +10,9 @@
  *	<[email protected]>
  */
 
+#include <linux/config.h>
+#include <linux/sched.h>
+
 #define TIMER_IRQ 0
 
 /*
@@ -52,4 +55,8 @@ struct irqaction;
 struct pt_regs;
 int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 
+#ifdef CONFIG_HOTPLUG_CPU
+extern void fixup_irqs(cpumask_t map);
+#endif
+
 #endif /* _ASM_IRQ_H */
diff -Nrup linux-2.6.11-mm4/include/asm-x86_64/smp.h linux-2.6.11-mm4-new/include/asm-x86_64/smp.h
--- linux-2.6.11-mm4/include/asm-x86_64/smp.h	2005-03-17 01:04:38.000000000 +0100
+++ linux-2.6.11-mm4-new/include/asm-x86_64/smp.h	2005-03-17 00:01:05.000000000 +0100
@@ -145,6 +145,9 @@ static __inline int logical_smp_processo
 	/* we don't want to mark this access volatile - bad code generation */
 	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
 }
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
 #endif
 
 #endif
diff -Nrup linux-2.6.11-mm4/include/linux/suspend.h linux-2.6.11-mm4-new/include/linux/suspend.h
--- linux-2.6.11-mm4/include/linux/suspend.h	2005-03-18 01:03:14.000000000 +0100
+++ linux-2.6.11-mm4-new/include/linux/suspend.h	2005-03-18 01:04:07.000000000 +0100
@@ -61,10 +61,10 @@ static inline int software_suspend(void)
 #endif
 
 #ifdef CONFIG_SMP
-extern void disable_nonboot_cpus(void);
+extern int disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
-static inline void disable_nonboot_cpus(void) {}
+static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
 #endif
 
diff -Nrup linux-2.6.11-mm4/kernel/power/disk.c linux-2.6.11-mm4-new/kernel/power/disk.c
--- linux-2.6.11-mm4/kernel/power/disk.c	2005-03-17 01:04:39.000000000 +0100
+++ linux-2.6.11-mm4-new/kernel/power/disk.c	2005-03-21 00:26:41.000000000 +0100
@@ -117,8 +117,8 @@ static void finish(void)
 {
 	device_resume();
 	platform_finish();
-	enable_nonboot_cpus();
 	thaw_processes();
+	enable_nonboot_cpus();
 	pm_restore_console();
 }
 
@@ -127,8 +127,6 @@ static int prepare_processes(void)
 {
 	int error;
 
-	pm_prepare_console();
-
 	sys_sync();
 
 	if (freeze_processes()) {
@@ -151,8 +149,8 @@ static int prepare_processes(void)
 
 static void unprepare_processes(void)
 {
-	enable_nonboot_cpus();
 	thaw_processes();
+	enable_nonboot_cpus();
 	pm_restore_console();
 }
 
@@ -160,11 +158,9 @@ static int prepare_devices(void)
 {
 	int error;
 
-	disable_nonboot_cpus();
 	if ((error = device_suspend(PMSG_FREEZE))) {
 		printk("Some devices failed to suspend\n");
 		platform_finish();
-		enable_nonboot_cpus();
 		return error;
 	}
 
@@ -184,13 +180,22 @@ int pm_suspend_disk(void)
 {
 	int error;
 
+	pm_prepare_console();
+
+	if ((error = disable_nonboot_cpus())) {
+		/*enable_nonboot_cpus();*/
+		pm_restore_console();
+		return error;
+	}
 	error = prepare_processes();
-	if (!error) {
+	error = -EFAULT;
+	ssleep(5);
+	if (!error)
 		error = prepare_devices();
-	}
-
 	if (error) {
-		unprepare_processes();
+		thaw_processes();
+		/*enable_nonboot_cpus();*/
+		pm_restore_console();
 		return error;
 	}
 
diff -Nrup linux-2.6.11-mm4/kernel/power/Kconfig linux-2.6.11-mm4-new/kernel/power/Kconfig
--- linux-2.6.11-mm4/kernel/power/Kconfig	2005-03-02 08:38:25.000000000 +0100
+++ linux-2.6.11-mm4-new/kernel/power/Kconfig	2005-03-18 00:15:26.000000000 +0100
@@ -28,7 +28,7 @@ config PM_DEBUG
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && PM && SWAP
+	depends on EXPERIMENTAL && PM && SWAP && (HOTPLUG_CPU || !SMP)
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need APM.
diff -Nrup linux-2.6.11-mm4/kernel/power/smp.c linux-2.6.11-mm4-new/kernel/power/smp.c
--- linux-2.6.11-mm4/kernel/power/smp.c	2005-03-17 01:04:39.000000000 +0100
+++ linux-2.6.11-mm4-new/kernel/power/smp.c	2005-03-21 00:12:39.000000000 +0100
@@ -7,79 +7,52 @@
  * This file is released under the GPLv2.
  */
 
-#undef DEBUG
-
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <asm/atomic.h>
 #include <asm/tlbflush.h>
+#include <asm/cpu.h>
 
-static atomic_t cpu_counter, freeze;
-
-
-static void smp_pause(void * data)
-{
-	struct saved_context ctxt;
-	__save_processor_state(&ctxt);
-	printk("Sleeping in:\n");
-	dump_stack();
-	atomic_inc(&cpu_counter);
-	while (atomic_read(&freeze)) {
-		/* FIXME: restore takes place at random piece inside this.
-		   This should probably be written in assembly, and
-		   preserve general-purpose registers, too
-
-		   What about stack? We may need to move to new stack here.
-
-		   This should better be ran with interrupts disabled.
-		 */
-		cpu_relax();
-		barrier();
-	}
-	atomic_dec(&cpu_counter);
-	__restore_processor_state(&ctxt);
-}
-
-static cpumask_t oldmask;
+cpumask_t frozen_cpus;
 
-void disable_nonboot_cpus(void)
+int disable_nonboot_cpus(void)
 {
-	printk("Freezing CPUs (at %d)", smp_processor_id());
-	oldmask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(0));
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(HZ);
-	printk("...");
-	BUG_ON(smp_processor_id() != 0);
-
-	/* FIXME: for this to work, all the CPUs must be running
-	 * "idle" thread (or we deadlock). Is that guaranteed? */
+	int cpu, error;
 
-	atomic_set(&cpu_counter, 0);
-	atomic_set(&freeze, 1);
-	smp_call_function(smp_pause, NULL, 0, 0);
-	while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
-		cpu_relax();
-		barrier();
+	error = 0;
+	cpus_clear(frozen_cpus);
+	printk("Freezing cpus ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == 0)
+			continue;
+		error = cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+			continue;
+		}
+		printk("Error taking cpu %d down: %d\n", cpu, error);
 	}
-	printk("ok\n");
+	BUG_ON(smp_processor_id() != 0);
+	return error;
 }
 
 void enable_nonboot_cpus(void)
 {
-	printk("Restarting CPUs");
-	atomic_set(&freeze, 0);
-	while (atomic_read(&cpu_counter)) {
-		cpu_relax();
-		barrier();
-	}
-	printk("...");
-	set_cpus_allowed(current, oldmask);
-	schedule();
-	printk("ok\n");
+	int cpu, error;
 
+	printk("Thawing cpus ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		if (cpu == 0)
+			continue;
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk("Error taking cpu %d up: %d\n", cpu, error);
+		panic("Not enough cpus");
+	}
 }
-
-
diff -Nrup linux-2.6.11-mm4/kernel/stop_machine.c linux-2.6.11-mm4-new/kernel/stop_machine.c
--- linux-2.6.11-mm4/kernel/stop_machine.c	2005-03-02 08:37:47.000000000 +0100
+++ linux-2.6.11-mm4-new/kernel/stop_machine.c	2005-03-20 21:09:31.000000000 +0100
@@ -175,10 +175,13 @@ struct task_struct *__stop_machine_run(i
 
 	down(&stopmachine_mutex);
 
+	printk("__stop_machine_run(): cpu = %d\n", cpu);
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
 		cpu = _smp_processor_id();
 
+	printk("__stop_machine_run(): Running on CPU %d\n", _smp_processor_id()); /*RJW*/
+	printk("__stop_machine_run(): Destination CPU %d\n", cpu); /*RJW*/
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
 		kthread_bind(p, cpu);

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux