[PATCH -mm] futex-pi: fix possible pi_lock deadlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ingo Molnar <[email protected]>

The lock validator detected a possible deadlock between tasklist lock and
task->pi_lock.
Prevent the deadlock by disabling interrupts across pi_lock operations.

Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>

 kernel/futex.c   |   28 ++++++++++++------------
 kernel/rtmutex.c |   63 ++++++++++++++++++++++++++++++-------------------------
 kernel/sched.c   |   45 ++++++++++++++++++++++++++++++---------
 3 files changed, 84 insertions(+), 52 deletions(-)

Index: linux-2.6.17-rc3-mm1/kernel/sched.c
===================================================================
--- linux-2.6.17-rc3-mm1.orig/kernel/sched.c
+++ linux-2.6.17-rc3-mm1/kernel/sched.c
@@ -360,6 +360,25 @@ static inline void finish_lock_switch(ru
 #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 
 /*
+ * __task_rq_lock - lock the runqueue a given task resides on.
+ * Must be called interrupts disabled.
+ */
+static inline runqueue_t *__task_rq_lock(task_t *p)
+	__acquires(rq->lock)
+{
+	struct runqueue *rq;
+
+repeat_lock_task:
+	rq = task_rq(p);
+	spin_lock(&rq->lock);
+	if (unlikely(rq != task_rq(p))) {
+		spin_unlock(&rq->lock);
+		goto repeat_lock_task;
+	}
+	return rq;
+}
+
+/*
  * task_rq_lock - lock the runqueue a given task resides on and disable
  * interrupts.  Note the ordering: we can safely lookup the task_rq without
  * explicitly disabling preemption.
@@ -380,6 +399,12 @@ repeat_lock_task:
 	return rq;
 }
 
+static inline void __task_rq_unlock(runqueue_t *rq)
+	__releases(rq->lock)
+{
+	spin_unlock(&rq->lock);
+}
+
 static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
 	__releases(rq->lock)
 {
@@ -4016,17 +4041,17 @@ recheck:
 	 * make sure no PI-waiters arrive (or leave) while we are
 	 * changing the priority of the task:
 	 */
-	spin_lock(&p->pi_lock);
+	spin_lock_irqsave(&p->pi_lock, flags);
 	/*
 	 * To be able to change p->policy safely, the apropriate
 	 * runqueue lock must be held.
 	 */
-	rq = task_rq_lock(p, &flags);
+	rq = __task_rq_lock(p);
 	/* recheck policy now with rq lock held */
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
-		task_rq_unlock(rq, &flags);
-		spin_unlock(&p->pi_lock);
+		__task_rq_unlock(rq);
+		spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
 	array = p->array;
@@ -4047,8 +4072,8 @@ recheck:
 		} else if (TASK_PREEMPTS_CURR(p, rq))
 			resched_task(rq->curr);
 	}
-	task_rq_unlock(rq, &flags);
-	spin_unlock(&p->pi_lock);
+	__task_rq_unlock(rq);
+	spin_unlock_irqrestore(&p->pi_lock, flags);
 
 	return 0;
 }
@@ -6680,8 +6705,8 @@ void normalize_rt_tasks(void)
 		if (!rt_task(p))
 			continue;
 
-		spin_lock(&p->pi_lock);
-		rq = task_rq_lock(p, &flags);
+		spin_lock_irqsave(&p->pi_lock, flags);
+		rq = __task_rq_lock(p);
 
 		array = p->array;
 		if (array)
@@ -6692,8 +6717,8 @@ void normalize_rt_tasks(void)
 			resched_task(rq->curr);
 		}
 
-		task_rq_unlock(rq, &flags);
-		spin_unlock(&p->pi_lock);
+		__task_rq_unlock(rq);
+		spin_unlock_irqrestore(&p->pi_lock, flags);
 	}
 	read_unlock_irq(&tasklist_lock);
 }
Index: linux-2.6.17-rc3-mm1/kernel/futex.c
===================================================================
--- linux-2.6.17-rc3-mm1.orig/kernel/futex.c
+++ linux-2.6.17-rc3-mm1/kernel/futex.c
@@ -358,9 +358,9 @@ static void free_pi_state(struct futex_p
 	WARN_ON(!pi_state->owner);
 	WARN_ON(!rt_mutex_is_locked(&pi_state->pi_mutex));
 
-	spin_lock(&pi_state->owner->pi_lock);
+	spin_lock_irq(&pi_state->owner->pi_lock);
 	list_del_init(&pi_state->list);
-	spin_unlock(&pi_state->owner->pi_lock);
+	spin_unlock_irq(&pi_state->owner->pi_lock);
 
 	rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
 
@@ -422,18 +422,18 @@ void exit_pi_state_list(struct task_stru
 	 * pi_state_list anymore, but we have to be careful
 	 * versus waiters unqueueing themselfs
 	 */
-	spin_lock(&curr->pi_lock);
+	spin_lock_irq(&curr->pi_lock);
 	while (!list_empty(head)) {
 
 		next = head->next;
 		pi_state = list_entry(next, struct futex_pi_state, list);
 		key = pi_state->key;
-		spin_unlock(&curr->pi_lock);
+		spin_unlock_irq(&curr->pi_lock);
 
 		hb = hash_futex(&key);
 		spin_lock(&hb->lock);
 
-		spin_lock(&curr->pi_lock);
+		spin_lock_irq(&curr->pi_lock);
 		if (head->next != next) {
 			spin_unlock(&hb->lock);
 			continue;
@@ -444,15 +444,15 @@ void exit_pi_state_list(struct task_stru
 		WARN_ON(pi_state->owner != curr);
 
 		pi_state->owner = NULL;
-		spin_unlock(&curr->pi_lock);
+		spin_unlock_irq(&curr->pi_lock);
 
 		rt_mutex_unlock(&pi_state->pi_mutex);
 
 		spin_unlock(&hb->lock);
 
-		spin_lock(&curr->pi_lock);
+		spin_lock_irq(&curr->pi_lock);
 	}
-	spin_unlock(&curr->pi_lock);
+	spin_unlock_irq(&curr->pi_lock);
 }
 
 static int
@@ -500,10 +500,10 @@ lookup_pi_state(u32 uval, struct futex_h
 	/* Store the key for possible exit cleanups: */
 	pi_state->key = me->key;
 
-	spin_lock(&p->pi_lock);
+	spin_lock_irq(&p->pi_lock);
 	list_add(&pi_state->list, &p->pi_state_list);
 	pi_state->owner = p;
-	spin_unlock(&p->pi_lock);
+	spin_unlock_irq(&p->pi_lock);
 
 	put_task_struct(p);
 
@@ -1213,17 +1213,17 @@ static int do_futex_lock_pi(u32 __user *
 
 		/* Owner died? */
 		if (q.pi_state->owner != NULL) {
-			spin_lock(&q.pi_state->owner->pi_lock);
+			spin_lock_irq(&q.pi_state->owner->pi_lock);
 			list_del_init(&q.pi_state->list);
-			spin_unlock(&q.pi_state->owner->pi_lock);
+			spin_unlock_irq(&q.pi_state->owner->pi_lock);
 		} else
 			newtid |= FUTEX_OWNER_DIED;
 
 		q.pi_state->owner = current;
 
-		spin_lock(&current->pi_lock);
+		spin_lock_irq(&current->pi_lock);
 		list_add(&q.pi_state->list, &current->pi_state_list);
-		spin_unlock(&current->pi_lock);
+		spin_unlock_irq(&current->pi_lock);
 
 		/* Unqueue and drop the lock */
 		unqueue_me_pi(&q, hb);
Index: linux-2.6.17-rc3-mm1/kernel/rtmutex.c
===================================================================
--- linux-2.6.17-rc3-mm1.orig/kernel/rtmutex.c
+++ linux-2.6.17-rc3-mm1/kernel/rtmutex.c
@@ -134,9 +134,11 @@ static void __rt_mutex_adjust_prio(struc
  */
 static void rt_mutex_adjust_prio(struct task_struct *task)
 {
-	spin_lock(&task->pi_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->pi_lock, flags);
 	__rt_mutex_adjust_prio(task);
-	spin_unlock(&task->pi_lock);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
 }
 
 /*
@@ -158,6 +160,7 @@ static int rt_mutex_adjust_prio_chain(ta
 	struct rt_mutex *lock;
 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 	int detect_deadlock, ret = 0, depth = 0;
+	unsigned long flags;
 
 	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
 							 deadlock_detect);
@@ -190,7 +193,7 @@ static int rt_mutex_adjust_prio_chain(ta
 	/*
 	 * Task can not go away as we did a get_task() before !
 	 */
-	spin_lock(&task->pi_lock);
+	spin_lock_irqsave(&task->pi_lock, flags);
 
 	waiter = task->pi_blocked_on;
 	/*
@@ -214,7 +217,7 @@ static int rt_mutex_adjust_prio_chain(ta
 
 	lock = waiter->lock;
 	if (!spin_trylock(&lock->wait_lock)) {
-		spin_unlock(&task->pi_lock);
+		spin_unlock_irqrestore(&task->pi_lock, flags);
 		cpu_relax();
 		goto retry;
 	}
@@ -235,12 +238,12 @@ static int rt_mutex_adjust_prio_chain(ta
 	plist_add(&waiter->list_entry, &lock->wait_list);
 
 	/* Release the task */
-	spin_unlock(&task->pi_lock);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
 	put_task_struct(task);
 
 	/* Grab the next task */
 	task = rt_mutex_owner(lock);
-	spin_lock(&task->pi_lock);
+	spin_lock_irqsave(&task->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		/* Boost the owner */
@@ -259,7 +262,7 @@ static int rt_mutex_adjust_prio_chain(ta
 	}
 
 	get_task_struct(task);
-	spin_unlock(&task->pi_lock);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	top_waiter = rt_mutex_top_waiter(lock);
 	spin_unlock(&lock->wait_lock);
@@ -270,7 +273,7 @@ static int rt_mutex_adjust_prio_chain(ta
 	goto again;
 
  out_unlock_pi:
-	spin_unlock(&task->pi_lock);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
  out_put_task:
 	put_task_struct(task);
 	return ret;
@@ -285,6 +288,7 @@ static inline int try_to_steal_lock(stru
 {
 	struct task_struct *pendowner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *next;
+	unsigned long flags;
 
 	if (!rt_mutex_owner_pending(lock))
 		return 0;
@@ -292,9 +296,9 @@ static inline int try_to_steal_lock(stru
 	if (pendowner == current)
 		return 1;
 
-	spin_lock(&pendowner->pi_lock);
+	spin_lock_irqsave(&pendowner->pi_lock, flags);
 	if (current->prio >= pendowner->prio) {
-		spin_unlock(&pendowner->pi_lock);
+		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 0;
 	}
 
@@ -304,7 +308,7 @@ static inline int try_to_steal_lock(stru
 	 * priority.
 	 */
 	if (likely(!rt_mutex_has_waiters(lock))) {
-		spin_unlock(&pendowner->pi_lock);
+		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 1;
 	}
 
@@ -312,7 +316,7 @@ static inline int try_to_steal_lock(stru
 	next = rt_mutex_top_waiter(lock);
 	plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
 	__rt_mutex_adjust_prio(pendowner);
-	spin_unlock(&pendowner->pi_lock);
+	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 
 	/*
 	 * We are going to steal the lock and a waiter was
@@ -329,10 +333,10 @@ static inline int try_to_steal_lock(stru
 	 * might be current:
 	 */
 	if (likely(next->task != current)) {
-		spin_lock(&current->pi_lock);
+		spin_lock_irqsave(&current->pi_lock, flags);
 		plist_add(&next->pi_list_entry, &current->pi_waiters);
 		__rt_mutex_adjust_prio(current);
-		spin_unlock(&current->pi_lock);
+		spin_unlock_irqrestore(&current->pi_lock, flags);
 	}
 	return 1;
 }
@@ -396,8 +400,9 @@ static int task_blocks_on_rt_mutex(struc
 	struct rt_mutex_waiter *top_waiter = waiter;
 	task_t *owner = rt_mutex_owner(lock);
 	int boost = 0, res;
+	unsigned long flags;
 
-	spin_lock(&current->pi_lock);
+	spin_lock_irqsave(&current->pi_lock, flags);
 	__rt_mutex_adjust_prio(current);
 	waiter->task = current;
 	waiter->lock = lock;
@@ -411,10 +416,10 @@ static int task_blocks_on_rt_mutex(struc
 
 	current->pi_blocked_on = waiter;
 
-	spin_unlock(&current->pi_lock);
+	spin_unlock_irqrestore(&current->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
-		spin_lock(&owner->pi_lock);
+		spin_lock_irqsave(&owner->pi_lock, flags);
 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 		plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 
@@ -423,15 +428,15 @@ static int task_blocks_on_rt_mutex(struc
 			boost = 1;
 			get_task_struct(owner);
 		}
-		spin_unlock(&owner->pi_lock);
+		spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
-		spin_lock(&owner->pi_lock);
+		spin_lock_irqsave(&owner->pi_lock, flags);
 		if (owner->pi_blocked_on) {
 			boost = 1;
 			get_task_struct(owner);
 		}
-		spin_unlock(&owner->pi_lock);
+		spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 	if (!boost)
 		return 0;
@@ -458,8 +463,9 @@ static void wakeup_next_waiter(struct rt
 {
 	struct rt_mutex_waiter *waiter;
 	struct task_struct *pendowner;
+	unsigned long flags;
 
-	spin_lock(&current->pi_lock);
+	spin_lock_irqsave(&current->pi_lock, flags);
 
 	waiter = rt_mutex_top_waiter(lock);
 	plist_del(&waiter->list_entry, &lock->wait_list);
@@ -476,7 +482,7 @@ static void wakeup_next_waiter(struct rt
 
 	rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
 
-	spin_unlock(&current->pi_lock);
+	spin_unlock_irqrestore(&current->pi_lock, flags);
 
 	/*
 	 * Clear the pi_blocked_on variable and enqueue a possible
@@ -485,7 +491,7 @@ static void wakeup_next_waiter(struct rt
 	 * waiter with higher priority than pending-owner->normal_prio
 	 * is blocked on the unboosted (pending) owner.
 	 */
-	spin_lock(&pendowner->pi_lock);
+	spin_lock_irqsave(&pendowner->pi_lock, flags);
 
 	WARN_ON(!pendowner->pi_blocked_on);
 	WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -499,7 +505,7 @@ static void wakeup_next_waiter(struct rt
 		next = rt_mutex_top_waiter(lock);
 		plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
 	}
-	spin_unlock(&pendowner->pi_lock);
+	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 
 	wake_up_process(pendowner);
 }
@@ -515,16 +521,17 @@ static void remove_waiter(struct rt_mute
 	int first = (waiter == rt_mutex_top_waiter(lock));
 	int boost = 0;
 	task_t *owner = rt_mutex_owner(lock);
+	unsigned long flags;
 
-	spin_lock(&current->pi_lock);
+	spin_lock_irqsave(&current->pi_lock, flags);
 	plist_del(&waiter->list_entry, &lock->wait_list);
 	waiter->task = NULL;
 	current->pi_blocked_on = NULL;
-	spin_unlock(&current->pi_lock);
+	spin_unlock_irqrestore(&current->pi_lock, flags);
 
 	if (first && owner != current) {
 
-		spin_lock(&owner->pi_lock);
+		spin_lock_irqsave(&owner->pi_lock, flags);
 
 		plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
 
@@ -540,7 +547,7 @@ static void remove_waiter(struct rt_mute
 			boost = 1;
 			get_task_struct(owner);
 		}
-		spin_unlock(&owner->pi_lock);
+		spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 
 	WARN_ON(!plist_node_empty(&waiter->pi_list_entry));


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux