Re: PATCH/RFC: [kdump] fix APIC shutdown sequence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Vivek Goyal wrote:

> But the issue here seems to be that LAPIC state got clear but IRR bit
> at IOAPIC bit is not cleared because IOAPIC vector information was deleted
> in first kernel and now upon receiving EOI, it does not know this EOI belongs
> to which vector.

I am making another experiment right now. I check the LAPIC ISR flags in
machine_crash_shutdown() before disabling the LAPIC, trying to send EOI if
I find one ISR bit set. So far, I haven't seen a single case where an ISR bit
was set, but several (9) where the IO-APIC IRR bit was set. OTOH, I know that if
I'd re-enable IRQs, IRQ 225 would be raised. The whole thing is done before
disable_IO_APIC().

Don't ask me for an explanation why I don't see the ISR bits.

The arch/x86_64/kernel/crash.c portion of the patch I am using is attached for
your reference. The rest is the same as for the original patch.

Martin

-- 
Martin Wilck
PRIMERGY System Software Engineer
FSC IP ESP DE6

Fujitsu Siemens Computers GmbH
Heinz-Nixdorf-Ring 1
33106 Paderborn
Germany

Tel:			++49 5251 8 15113
Fax:			++49 5251 8 20409
Email:			mailto:[email protected]
Internet:		http://www.fujitsu-siemens.com
Company Details:	http://www.fujitsu-siemens.com/imprint.html
--- 2.6.23-rc1/arch/x86_64/kernel/crash.c.orig	2007-06-05 11:19:07.000000000 +0200
+++ 2.6.23-rc1/arch/x86_64/kernel/crash.c	2007-08-08 18:19:15.000000000 +0200
@@ -18,18 +18,51 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/kdebug.h>
+#include <linux/interrupt.h>
 
 #include <asm/processor.h>
 #include <asm/hardirq.h>
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/mach_apic.h>
+#include <asm/apic.h>
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
 
 #ifdef CONFIG_SMP
 static atomic_t waiting_for_crash_ipi;
+static atomic_t crash_ipi_stage2 = ATOMIC_INIT(0);
+
+extern void remove_siblinginfo(int);
+extern void remove_cpu_from_maps(void);
+extern void crash_mask_IO_APIC(int);
+
+static void ack_pending_irqs(int cpu)
+{
+	int i, j, k;
+	unsigned int value;
+	printk("APIC ISR %d:", cpu);
+	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+		for (k = 0; k < 100; k++) {
+			value = apic_read(APIC_ISR + i*0x10);
+			if (value == 0)
+				break;
+			for (j = 31; j >= 0; j--) {
+				if (value & (1<<j)) {
+					printk (" %x", (i<<5) + j);
+					ack_APIC_irq();
+					goto next_k;
+				}
+			}
+		next_k:
+			continue;
+		}
+		if (k >= 100)
+			printk("!!!!\n");
+	}
+	printk("\n");
+}
 
 static int crash_nmi_callback(struct notifier_block *self,
 				unsigned long val, void *data)
@@ -53,13 +86,27 @@ static int crash_nmi_callback(struct not
 	local_irq_disable();
 
 	crash_save_cpu(regs, cpu);
-	disable_local_APIC();
-	atomic_dec(&waiting_for_crash_ipi);
+	disable_APIC_timer();
+        atomic_dec(&waiting_for_crash_ipi);
+
+	while(atomic_read(&crash_ipi_stage2) == 0)
+		cpu_relax();
+ 
+ 	remove_siblinginfo(cpu);
+ 	cpu_clear(cpu, cpu_online_map);
+ 	remove_cpu_from_maps();
+ 
+	ack_pending_irqs(cpu);
+ 
+ 	disable_local_APIC();
+ 	atomic_dec(&crash_ipi_stage2);
+ 
+
 	/* Assume hlt works */
 	for(;;)
 		halt();
 
-	return 1;
+	return NOTIFY_OK;
 }
 
 static void smp_send_nmi_allbutself(void)
@@ -79,7 +126,7 @@ static struct notifier_block crash_nmi_n
 
 static void nmi_shootdown_cpus(void)
 {
-	unsigned long msecs;
+	unsigned long usecs;
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	if (register_die_notifier(&crash_nmi_nb))
@@ -93,19 +140,33 @@ static void nmi_shootdown_cpus(void)
 
 	smp_send_nmi_allbutself();
 
+	usecs = 1000000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && usecs) {
+		udelay(1);
+		usecs--;
+	}
+}
+
+static void nmi_shootdown_cpus_stage2(void)
+{
+	
+	unsigned long msecs;
+	atomic_set(&crash_ipi_stage2, num_online_cpus());
+
 	msecs = 1000; /* Wait at most a second for the other cpus to stop */
-	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+	while ((atomic_read(&crash_ipi_stage2) > 1) && msecs) {
 		mdelay(1);
 		msecs--;
 	}
-	/* Leave the nmi callback set */
-	disable_local_APIC();
 }
 #else
 static void nmi_shootdown_cpus(void)
 {
 	/* There are no cpus to shootdown */
 }
+static void nmi_shootdown_cpus_stage2(void)
+{
+}
 #endif
 
 void machine_crash_shutdown(struct pt_regs *regs)
@@ -121,15 +182,29 @@ void machine_crash_shutdown(struct pt_re
 	 */
 	/* The kernel is broken so disable interrupts */
 	local_irq_disable();
-
 	/* Make a note of crashing cpu. Will be used in NMI callback.*/
 	crashing_cpu = smp_processor_id();
+	crash_save_cpu(regs, crashing_cpu);
+
+ 	/* disable timer interrupts */
+ 	disable_irq_nosync(0);
+ 	disable_APIC_timer();	
+ 
 	nmi_shootdown_cpus();
 
+ 	/* Mask all IRQs, and make sure they are delivered to this CPU */
+ 	crash_mask_IO_APIC(crashing_cpu);
+ 	nmi_shootdown_cpus_stage2();
+ 
+	ack_pending_irqs(crashing_cpu);
+
 	if(cpu_has_apic)
 		 disable_local_APIC();
 
+ 	/* Send EOI for pending IRQs */
+ 	/* local_irq_enable();
+	   udelay(10000);
+	   local_irq_disable(); */
+ 
 	disable_IO_APIC();
-
-	crash_save_cpu(regs, smp_processor_id());
 }

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux