Re: [PATCH] shrink_dcache_parent() races against shrink_dcache_memory()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jan,

1. this patch doesn't fix the whole problem. iput() after sb free is still possible. So busy inodes after umount too.
2. it has big problems with locking...

comments below inside.

Kirill

Kirill Korotaev <[email protected]> discovered a race between shrink_dcache_parent()
and shrink_dcache_memory(). That one is based on dput() is calling
dentry_iput() too early and therefore is giving up the dcache_lock. This leads
to the situation that the parent dentry might be still referenced although all
childs are already dead. This parent is ignore by a concurrent select_parent()
call which might be the reason for busy inode after umount failures.

This is from Kirill's original patch:

CPU 1                    CPU 2
~~~~~                    ~~~~~
umount /dev/sda1
generic_shutdown_super   shrink_dcache_memory
shrink_dcache_parent     prune_one_dentry
select_parent            dput     <<<< child is dead, locks are released,
                                       but parent is still referenced!!! >>>>
skip dentry->parent,
since it's d_count > 0

message: BUSY inodes after umount...
                                  <<< parent is left on dentry_unused list,
                                      referencing freed super block >>>

This patch is introducing dput_locked() which is doing all the dput work
except of freeing up the dentry's inode and memory itself. Therefore, when the
dcache_lock is given up, all the reference counts of the parents are correct.
prune_one_dentry() must also use the dput_locked version and free up the
inodes and the memory of the parents later. Otherwise we have an incorrect
reference count on the parents of the dentry to prune.

Signed-off-by: Jan Blunck <[email protected]>
---


------------------------------------------------------------------------

 fs/dcache.c |   76 ++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 22 deletions(-)

Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -143,21 +143,18 @@ static void dentry_iput(struct dentry * * no dcache lock, please.
  */
-void dput(struct dentry *dentry)
+static void dput_locked(struct dentry *dentry, struct list_head *list)
 {
 	if (!dentry)
 		return;
-repeat:
-	if (atomic_read(&dentry->d_count) == 1)
-		might_sleep();
-	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+	if (!atomic_dec_and_test(&dentry->d_count))
 		return;
+repeat:
 	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		return;
 	}
@@ -177,32 +174,54 @@ repeat:
   		dentry_stat.nr_unused++;
   	}
  	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	return;
unhash_it:
 	__d_drop(dentry);
kill_it: {
-		struct dentry *parent;
-
 		/* If dentry was on d_lru list
 		 * delete it from there
 		 */
   		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
+  			list_del_init(&dentry->d_lru);
   			dentry_stat.nr_unused--;
   		}
   		list_del(&dentry->d_u.d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
-		/*drops the locks, at that point nobody can reach this dentry */
-		dentry_iput(dentry);
-		parent = dentry->d_parent;
-		d_free(dentry);
-		if (dentry == parent)
+		/* at this point nobody can reach this dentry */
+		list_add(&dentry->d_lru, list);
+		spin_unlock(&dentry->d_lock);
+		if (dentry == dentry->d_parent)
 			return;
-		dentry = parent;
-		goto repeat;
+		dentry = dentry->d_parent;
+		if (atomic_dec_and_test(&dentry->d_count))
+			goto repeat;
<<<< I would prefer to have "goto repeat" as it was before...
+		/* out */
+	}
+}
+
+void dput(struct dentry *dentry)
+{
+	LIST_HEAD(free_list);
+
+	if (!dentry)
+		return;
+
+	if (atomic_add_unless(&dentry->d_count, -1, 1))
+		return;
<<<< I would better introduce __dput_locked() w/o atomic_dec_and_test() instead of using this atomic_add_unless()... <<<< For me it looks like an obfuscation of a code, which must be clean and tidy.
+
+	spin_lock(&dcache_lock);
+	dput_locked(dentry, &free_list);
+	spin_unlock(&dcache_lock);
+
<<<< 1. locking here is totally broken... spin_unlock() in dentry_iput()
<<<< 2. it doesn't help the situation I wrote to you,
<<<<    since iput() can be done on inode _after_ sb freeing...
+	if (!list_empty(&free_list)) {
+		struct dentry *dentry, *p;
+		list_for_each_entry_safe(dentry, p, &free_list, d_lru) {
+			list_del(&dentry->d_lru);
+			dentry_iput(dentry);
+			d_free(dentry);
+		}
 	}
 }
@@ -364,16 +383,29 @@ restart:
  */
 static inline void prune_one_dentry(struct dentry * dentry)
 {
-	struct dentry * parent;
+	LIST_HEAD(free_list);
__d_drop(dentry);
 	list_del(&dentry->d_u.d_child);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
-	dentry_iput(dentry);
-	parent = dentry->d_parent;
+
+	/* dput the parent here before we release dcache_lock */
+	if (dentry != dentry->d_parent)
+		dput_locked(dentry->d_parent, &free_list);
+
+	dentry_iput(dentry);		/* drop locks */
<<<< comment 2) from dput()
 	d_free(dentry);
-	if (parent != dentry)
-		dput(parent);
+
+	if (!list_empty(&free_list)) {
+		struct dentry *tmp, *p;
+
+		list_for_each_entry_safe(tmp, p, &free_list, d_lru) {
+			list_del(&tmp->d_lru);
+			dentry_iput(tmp);
<<<< comment 1) from dput()
+			d_free(tmp);
+		}
+	}
+
 	spin_lock(&dcache_lock);
 }


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux