[patch] x86_64, irq: check remote IRR bit before migrating level triggered irq

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



on x86_64 kernel, level triggered irq migration gets initiated in the context
of that interrupt(after executing the irq handler) and following steps are
followed to do the irq migration.

1. mask IOAPIC RTE entry;     // write to IOAPIC RTE
2. EOI;                       // processor EOI write
3. reprogram IOAPIC RTE entry // write to IOAPIC RTE with new destination and
                              // and interrupt vector due to per cpu vector
                              // allocation.
4. unmask IOAPIC RTE entry;   // write to IOAPIC RTE

Because of the per cpu vector allocation in x86_64 kernels, when the irq
migrates to a different cpu, new vector(corresponding to the new cpu) will
get allocated.

An EOI write to local APIC has a side effect of generating an EOI write 
for level trigger interrupts (normally this is a broadcast to all IOAPICs). 
The EOI broadcast generated as a side effect of EOI write to processor may 
be delayed while the other IOAPIC writes (step 3 and 4) can go through.

Normally, the EOI generated by local APIC for level trigger interrupt
contains vector number. The IOAPIC will take this vector number and
search the IOAPIC RTE entries for an entry with matching vector number and
clear the remote IRR bit (indicate EOI). However, if the vector number is
changed (as in step 3) the IOAPIC will not find the RTE entry when the EOI
is received later. This will cause the remote IRR to get stuck causing the
interrupt hang (no more interrupt from this RTE).

Current x86_64 kernel assumes that remote IRR bit is cleared by the time
IOAPIC RTE is reprogrammed. Fix this assumption by checking for remote IRR
bit and if it still set, delay the irq migration to the next interrupt
arrival event(hopefully, next time remote IRR bit will get cleared
before the IOAPIC RTE is reprogrammed).

Initial analysis and patch from Nanhai.

Clean up patch from Suresh.

Signed-off-by: Nanhai Zou <[email protected]>
Signed-off-by: Suresh Siddha <[email protected]>
Acked-by: Asit Mallick <[email protected]>
Cc: Keith Packard <[email protected]>
---

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 2a2df14..632ec57 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -163,7 +163,7 @@ static inline void io_apic_sync(unsigned int apic)
 	readl(&io_apic->data);
 }
 
-#define __DO_ACTION(R, ACTION, FINAL)					\
+#define __DO_ACTION(R, ACTION, FINAL, CHECKREG, VAL)			\
 									\
 {									\
 	int pin;							\
@@ -177,12 +177,18 @@ static inline void io_apic_sync(unsigned int apic)
 			break;						\
 		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
 		reg ACTION;						\
-		io_apic_modify(entry->apic, reg);			\
-		FINAL;							\
+		if (CHECKREG) {						\
+			if (reg == VAL)					\
+				return 1;				\
+		} else {						\
+			io_apic_modify(entry->apic, reg);		\
+			FINAL;						\
+		}							\
 		if (!entry->next)					\
 			break;						\
 		entry = irq_2_pin + entry->next;			\
 	}								\
+	return 0;							\
 }
 
 union entry_union {
@@ -241,6 +247,13 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
+/*
+ * If its Level triggered and RemoteIRR is still set?
+ */
+static int pending_eoi(unsigned int irq)
+{
+	__DO_ACTION(0, &= PENDING_EOI, , 1, PENDING_EOI);
+}
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	int apic, pin;
@@ -271,6 +284,16 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
+	/*
+	 * If the EOI still didn't reach the RTE corresponding to the
+	 * level triggered irq, postpone the irq migration to the next
+	 * irq arrival event.
+	 */
+	if (pending_eoi(irq)) {
+		irq_desc[irq].status |= IRQ_MOVE_PENDING;
+		return;
+	}
+
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
@@ -320,8 +343,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 
 #define DO_ACTION(name,R,ACTION, FINAL)					\
 									\
-	static void name##_IO_APIC_irq (unsigned int irq)		\
-	__DO_ACTION(R, ACTION, FINAL)
+	static int name##_IO_APIC_irq (unsigned int irq)		\
+	__DO_ACTION(R, ACTION, FINAL, 0, 0)
 
 DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
 						/* mask = 1 */
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h
index 969d225..94b9a74 100644
--- a/include/asm-x86_64/io_apic.h
+++ b/include/asm-x86_64/io_apic.h
@@ -90,6 +90,8 @@ struct IO_APIC_route_entry {
 		dest		:  8;
 } __attribute__ ((packed));
 
+#define PENDING_EOI	(1 << 14 | 1 << 15)
+
 /*
  * MP-BIOS irq configuration table structures:
  */
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 77b7acc..acc56aa 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -55,7 +55,6 @@ void move_masked_irq(int irq)
 	if (likely(!cpus_empty(tmp))) {
 		desc->chip->set_affinity(irq,tmp);
 	}
-	cpus_clear(irq_desc[irq].pending_mask);
 }
 
 void move_native_irq(int irq)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux