Re: [PATCH 2/2] mm: fix mm_struct reference counting bugs in mm/oom_kill.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Friday 14 April 2006 12:45, Andrew Morton wrote:
> Dave Peterson <[email protected]> wrote:
> > On Friday 14 April 2006 00:26, Andrew Morton wrote:
> > > task_lock() can be used to pin a task's ->mm.  To use task_lock() in
> > > badness() we'd need to either
> > >
> > > a) nest task_lock()s.  I don't know if we're doing that anywhere else,
> > >    but the parent->child ordering is a natural one.  or
> > >
> > > b) take a ref on the parent's mm_struct, drop the parent's task_lock()
> > >    while we walk the children, then do mmput() on the parent's mm
> > > outside tasklist_lock.  This is probably better.
> >
> > Looking a bit more closely at the code, I see that
> > select_bad_process() iterates over all tasks, repeatedly calling
> > badness().  This would complicate option 'b' since the iteration is
> > done while holding tasklist_lock.  An alternative to option 'a' that
> > avoids nesting task_lock()s would be to define a couple of new
> > functions that might look something like this:
> >
> >     void mmput_atomic(struct mm_struct *mm)
> >     {
> >             if (atomic_dec_and_test(&mm->mm_users)) {
> >                     add mm to a global list of expired mm_structs
> >             }
> >     }
> >
> >     void mmput_atomic_cleanup(void)
> >     {
> >             empty the global list of expired mm_structs and do
> >             cleanup stuff for each one
> >     }
>
> I think that's way too elaborate.
>
> What's wrong with this?

Yes of course... no need to nest task_lock()s after all.  Looks good
to me.

Another thing I noticed: oom_kill_task() calls mmput() while holding
tasklist_lock.  Here the calls to get_task_mm() and mmput() appear to
be unnecessary.  We shouldn't need to use any kind of locking or
reference counting since oom_kill_task() doesn't dereference into the
mm_struct or require the value of p->mm to stay constant.  I believe
the following (untested) code changes should fix the problem (and
simplify some other parts of the code).  Does this look correct?


diff -urNp -X /home/dsp/dontdiff linux-2.6.17-rc1/mm/oom_kill.c linux-2.6.17-rc1-fix/mm/oom_kill.c
--- linux-2.6.17-rc1/mm/oom_kill.c	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc1-fix/mm/oom_kill.c	2006-04-14 13:22:15.000000000 -0700
@@ -244,17 +244,15 @@ static void __oom_kill_task(task_t *p, c
 	force_sig(SIGKILL, p);
 }
 
-static struct mm_struct *oom_kill_task(task_t *p, const char *message)
+static int oom_kill_task(task_t *p, const char *message)
 {
-	struct mm_struct *mm = get_task_mm(p);
+	struct mm_struct *mm;
 	task_t * g, * q;
 
-	if (!mm)
-		return NULL;
-	if (mm == &init_mm) {
-		mmput(mm);
-		return NULL;
-	}
+	mm = p->mm;
+
+	if ((mm == NULL) || (mm == &init_mm))
+		return 1;
 
 	__oom_kill_task(p, message);
 	/*
@@ -266,13 +264,12 @@ static struct mm_struct *oom_kill_task(t
 			__oom_kill_task(q, message);
 	while_each_thread(g, q);
 
-	return mm;
+	return 0;
 }
 
-static struct mm_struct *oom_kill_process(struct task_struct *p,
-				unsigned long points, const char *message)
+static int oom_kill_process(struct task_struct *p, unsigned long points,
+		const char *message)
 {
- 	struct mm_struct *mm;
 	struct task_struct *c;
 	struct list_head *tsk;
 
@@ -283,9 +280,8 @@ static struct mm_struct *oom_kill_proces
 		c = list_entry(tsk, struct task_struct, sibling);
 		if (c->mm == p->mm)
 			continue;
-		mm = oom_kill_task(c, message);
-		if (mm)
-			return mm;
+		if (!oom_kill_task(c, message))
+			return 0;
 	}
 	return oom_kill_task(p, message);
 }
@@ -300,7 +296,6 @@ static struct mm_struct *oom_kill_proces
  */
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
-	struct mm_struct *mm = NULL;
 	task_t *p;
 	unsigned long points = 0;
 
@@ -320,12 +315,12 @@ void out_of_memory(struct zonelist *zone
 	 */
 	switch (constrained_alloc(zonelist, gfp_mask)) {
 	case CONSTRAINT_MEMORY_POLICY:
-		mm = oom_kill_process(current, points,
+		oom_kill_process(current, points,
 				"No available memory (MPOL_BIND)");
 		break;
 
 	case CONSTRAINT_CPUSET:
-		mm = oom_kill_process(current, points,
+		oom_kill_process(current, points,
 				"No available memory in cpuset");
 		break;
 
@@ -347,8 +342,7 @@ retry:
 			panic("Out of memory and no killable processes...\n");
 		}
 
-		mm = oom_kill_process(p, points, "Out of memory");
-		if (!mm)
+		if (oom_kill_process(p, points, "Out of memory"))
 			goto retry;
 
 		break;
@@ -357,8 +351,6 @@ retry:
 out:
 	read_unlock(&tasklist_lock);
 	cpuset_unlock();
-	if (mm)
-		mmput(mm);
 
 	/*
 	 * Give "p" a good chance of killing itself before we

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux