[patch] spinlock-debug fix

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



* Andrew Morton <[email protected]> wrote:

> Reuben Farrelly <[email protected]> wrote:
> >
> >  > Anyway, scary trace.  It look like some spinlock is thought to be in the
> >  > wrong state in schedule().  Send the .config, please.
> > 
> >  Now online at  http://www.reub.net/kernel/.config
> 
> Me too.
> 
> BUG: spinlock recursion on CPU#0, swapper/0, c120d520             
>  [<c01039ed>] dump_stack+0x19/0x20                   
>  [<c01d9af2>] spin_bug+0x42/0x54  
>  [<c01d9bfa>] _raw_spin_lock+0x3e/0x84
>  [<c031d0ad>] _spin_lock+0x9/0x10     
>  [<c031b9e9>] schedule+0x479/0xbc8
>  [<c0100cb4>] cpu_idle+0x88/0x8c  
>  [<c01002c1>] rest_init+0x21/0x28
>  [<c0442899>] start_kernel+0x151/0x158
>  [<c010020f>] 0xc010020f              
> Kernel panic - not syncing: bad locking
> 
> The bug is in the new spinlock debugging code itself.  Ingo, can you 
> test that .config please?

couldnt reproduce it on an UP box, nor on an SMP/HT 2/4-way box, but it 
finally triggered on a 2-way SMP box.

the bug is that current->pid is not a unique identifier on SMP (doh!).  

The patch below fixes the bug - which also happens to be a speedup for 
the debugging code, as the ->pid dereferencing does not have to be done 
anymore. Also, i've disabled the panicing for now.

	Ingo

- change owner_pid to owner, to fix bad pid uniqueness assumption on SMP
- some more debug output printed
- dont panic for now

Signed-off-by: Ingo Molnar <[email protected]>

 include/linux/spinlock_types.h |   16 ++++++++++------
 kernel/sched.c                 |    2 +-
 lib/spinlock_debug.c           |   30 +++++++++++++++++++-----------
 3 files changed, 30 insertions(+), 18 deletions(-)

Index: linux/include/linux/spinlock_types.h
===================================================================
--- linux.orig/include/linux/spinlock_types.h
+++ linux/include/linux/spinlock_types.h
@@ -21,11 +21,12 @@ typedef struct {
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned int magic, owner_pid, owner_cpu;
+	unsigned int magic, owner_cpu;
+	void *owner;
 #endif
 } spinlock_t;
 
-#define SPINLOCK_MAGIC  0xdead4ead
+#define SPINLOCK_MAGIC		0xdead4ead
 
 typedef struct {
 	raw_rwlock_t raw_lock;
@@ -33,22 +34,25 @@ typedef struct {
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned int magic, owner_pid, owner_cpu;
+	unsigned int magic, owner_cpu;
+	void *owner;
 #endif
 } rwlock_t;
 
-#define RWLOCK_MAGIC	0xdeaf1eed
+#define RWLOCK_MAGIC		0xdeaf1eed
+
+#define SPINLOCK_OWNER_INIT	((void *)-1L)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 # define SPIN_LOCK_UNLOCKED						\
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
 				.magic = SPINLOCK_MAGIC,		\
-				.owner_pid = -1,			\
+				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1 }
 #define RW_LOCK_UNLOCKED						\
 	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
 				.magic = RWLOCK_MAGIC,			\
-				.owner_pid = -1,			\
+				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1 }
 #else
 # define SPIN_LOCK_UNLOCKED \
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1604,7 +1604,7 @@ static inline void finish_task_switch(ru
 	prev_task_flags = prev->flags;
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner_pid = current->pid;
+	rq->lock.owner = current;
 #endif
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
Index: linux/lib/spinlock_debug.c
===================================================================
--- linux.orig/lib/spinlock_debug.c
+++ linux/lib/spinlock_debug.c
@@ -14,16 +14,24 @@
 static void spin_bug(spinlock_t *lock, const char *msg)
 {
 	static long print_once = 1;
+	struct task_struct *owner = NULL;
 
 	if (xchg(&print_once, 0)) {
-		printk("BUG: spinlock %s on CPU#%d, %s/%d, %p\n", msg,
-			smp_processor_id(), current->comm, current->pid, lock);
+		if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
+			owner = lock->owner;
+		printk("BUG: spinlock %s on CPU#%d, %s/%d\n",
+			msg, smp_processor_id(), current->comm, current->pid);
+		printk(" lock: %p, .magic: %08x, .owner: %s/%d, .owner_cpu: %d\n",
+			lock, lock->magic,
+			owner ? owner->comm : "<none>",
+			owner ? owner->pid : -1,
+			lock->owner_cpu);
 		dump_stack();
 #ifdef CONFIG_SMP
 		/*
 		 * We cannot continue on SMP:
 		 */
-		panic("bad locking");
+//		panic("bad locking");
 #endif
 	}
 }
@@ -33,7 +41,7 @@ static void spin_bug(spinlock_t *lock, c
 static inline void debug_spin_lock_before(spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(lock->owner_pid == current->pid, lock, "recursion");
+	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
 	SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
 							lock, "cpu recursion");
 }
@@ -41,17 +49,17 @@ static inline void debug_spin_lock_befor
 static inline void debug_spin_lock_after(spinlock_t *lock)
 {
 	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner_pid = current->pid;
+	lock->owner = current;
 }
 
 static inline void debug_spin_unlock(spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
 	SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked");
-	SPIN_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner");
+	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
 	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
-	lock->owner_pid = -1;
+	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
 }
 
@@ -176,7 +184,7 @@ void _raw_read_unlock(rwlock_t *lock)
 static inline void debug_write_lock_before(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	RWLOCK_BUG_ON(lock->owner_pid == current->pid, lock, "recursion");
+	RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
 	RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
 							lock, "cpu recursion");
 }
@@ -184,16 +192,16 @@ static inline void debug_write_lock_befo
 static inline void debug_write_lock_after(rwlock_t *lock)
 {
 	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner_pid = current->pid;
+	lock->owner = current;
 }
 
 static inline void debug_write_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	RWLOCK_BUG_ON(lock->owner_pid != current->pid, lock, "wrong owner");
+	RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
 	RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
-	lock->owner_pid = -1;
+	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux