Re: [PATCH] Fix shrink_dcache_parent() against shrink_dcache_memory() race (3rd updated patch)]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Friday March 10, [email protected] wrote:
> On Fri, Mar 10, Kirill Korotaev wrote:
> 
> > >I really think that we need to stop prune_one_dentry from being called
> > >on dentries for a filesystem that is being unmounted.  With that code
> > >currently in -git, that means passing a 'struct super_block *' into
> > >prune_dcache so that it ignores any filesystem with ->s_root==NULL
> > >unless that filesystem is the filesystem that was passed.
> > Can try...
> > 
> 
> Can not ... because of down_read(s_umount) before checking s_root :(

Sorry, I don't follow your logic.  Could you elaborate a bit please?

> 
> So what do we do now?
> 
>  1. always get the reference counting right outside of dcache_lock

This is possible, but I think it is a very intrusive patch.

> 
>  2. hack around with different paths for prune_dcache() when called from
>     shrink_dcache_memory() and shrink_dcache_parent()

I don't think the paths are very different.

The following patch is against 2.6.16-rc5-git14, is based on yours,
and avoids calling prune_one_dentry at inconvenient times.

Differences:
  - use waitqueue_active to decide whether to call wake_up
  - pass a 'struct super_block *' to prune_dcache when appropriate,
    and do not try to prune dentries for any other filesystem
    which has ->s_root == NULL
  - pass sb to shink_dcache_anon instead of &sb->s_anon.
  - Don't return a value from  wait_on_prunes
  - Call wait_on_prunes *before* shrink_dcache_{anon,parent}

I am fairly sure that this will do the right thing.

NeilBrown



Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
 ./fs/dcache.c            |   25 ++++++++++++++++++-------
 ./fs/super.c             |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
 ./include/linux/dcache.h |    2 +-
 ./include/linux/fs.h     |    3 +++
 4 files changed, 68 insertions(+), 9 deletions(-)

diff ./fs/dcache.c~current~ ./fs/dcache.c
--- ./fs/dcache.c~current~	2006-03-10 22:24:03.000000000 +1100
+++ ./fs/dcache.c	2006-03-10 22:39:07.000000000 +1100
@@ -361,20 +361,28 @@ restart:
  * This requires that the LRU list has already been
  * removed.
  * Called with dcache_lock, drops it and then regains.
+ *
+ * Wakes up any pending waiters (see wait_on_prunes()) if the
+ * dentry's filesystem is being umounted.
  */
 static inline void prune_one_dentry(struct dentry * dentry)
 {
+	struct super_block *sb = dentry->d_sb;
 	struct dentry * parent;
 
 	__d_drop(dentry);
 	list_del(&dentry->d_u.d_child);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
+	sb->s_prunes++;
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
 	d_free(dentry);
 	if (parent != dentry)
 		dput(parent);
 	spin_lock(&dcache_lock);
+	sb->s_prunes--;
+	if (waitqueue_active(&sb->s_wait_prunes))
+		wake_up(&sb->s_wait_prunes);
 }
 
 /**
@@ -390,7 +398,7 @@ static inline void prune_one_dentry(stru
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct super_block *sb)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
@@ -417,8 +425,10 @@ static void prune_dcache(int count)
  			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		/* If the dentry was recently referenced, don't free it. */
-		if (dentry->d_flags & DCACHE_REFERENCED) {
+		/* If the dentry was recently referenced, or is for
+		 * a unmounting filesystem, don't free it. */
+		if ((dentry->d_flags & DCACHE_REFERENCED) ||
+		    (dentry->d_sb != sb && dentry->d_sb->s_root == NULL)) {
 			dentry->d_flags &= ~DCACHE_REFERENCED;
  			list_add(&dentry->d_lru, &dentry_unused);
  			dentry_stat.nr_unused++;
@@ -635,7 +645,7 @@ void shrink_dcache_parent(struct dentry 
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found);
+		prune_dcache(found, parent->d_sb);
 }
 
 /**
@@ -648,8 +658,9 @@ void shrink_dcache_parent(struct dentry 
  * done under dcache_lock.
  *
  */
-void shrink_dcache_anon(struct hlist_head *head)
+void shrink_dcache_anon(struct super_block *sb)
 {
+	struct hlist_head = &sb->s_anon;
 	struct hlist_node *lp;
 	int found;
 	do {
@@ -673,7 +684,7 @@ void shrink_dcache_anon(struct hlist_hea
 			}
 		}
 		spin_unlock(&dcache_lock);
-		prune_dcache(found);
+		prune_dcache(found, sb);
 	} while(found);
 }
 
@@ -694,7 +705,7 @@ static int shrink_dcache_memory(int nr, 
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr);
+		prune_dcache(nr, NULL);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }

diff ./fs/super.c~current~ ./fs/super.c
--- ./fs/super.c~current~	2006-03-10 22:24:03.000000000 +1100
+++ ./fs/super.c	2006-03-10 22:37:14.000000000 +1100
@@ -80,6 +80,8 @@ static struct super_block *alloc_super(v
 		sema_init(&s->s_dquot.dqio_sem, 1);
 		sema_init(&s->s_dquot.dqonoff_sem, 1);
 		init_rwsem(&s->s_dquot.dqptr_sem);
+		s->s_prunes = 0;
+		init_waitqueue_head(&s->s_wait_prunes);
 		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
@@ -213,6 +215,40 @@ static int grab_super(struct super_block
 	return 0;
 }
 
+/*
+ * A special version of wait_event(!sb->s_prunes) which takes the dcache_lock
+ * when checking the condition.
+ *
+ * While we are waiting for pruned dentries to iput() their inode, the
+ * sb->s_prunes count is non-zero. Since the s_prunes counter is modified
+ * by prune_one_dentry() under dcache_lock, either the reference count on the
+ * parent dentry is wrong (and therefore it isn't on the lru-list yet) and we
+ * are waiting because s_prunes != 0 or the reference count is correct (and the
+ * parent dentry might be found by select_parent()) and the s_prunes == 0.
+ */
+static void wait_on_prunes(struct super_block *sb)
+{
+	DEFINE_WAIT(wait);
+
+#ifdef DCACHE_DEBUG
+	printk(KERN_DEBUG "%s: waiting for %d prunes\n", __FUNCTION__,
+	       sb->s_prunes);
+#endif
+
+	spin_lock(&dcache_lock);
+	for (;;) {
+		prepare_to_wait(&sb->s_wait_prunes, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (!sb->s_prunes)
+			break;
+		spin_unlock(&dcache_lock);
+		schedule();
+		spin_lock(&dcache_lock);
+	}
+	spin_unlock(&dcache_lock);
+	finish_wait(&sb->s_wait_prunes, &wait);
+}
+
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
@@ -230,8 +266,17 @@ void generic_shutdown_super(struct super
 
 	if (root) {
 		sb->s_root = NULL;
+
+		wait_on_prunes(sb);
+		/* At this point no dentries in this filesystem will be
+		 * in the process of being pruned by shrink_dcache_memory
+		 * (or anyone else), so no dentries should have external
+		 * references, so shrink_dcache_anon and
+		 * shrink_dcache_parent should find and free them all
+		 */
+		shrink_dcache_anon(sb);
 		shrink_dcache_parent(root);
-		shrink_dcache_anon(&sb->s_anon);
+
 		dput(root);
 		fsync_super(sb);
 		lock_super(sb);

diff ./include/linux/dcache.h~current~ ./include/linux/dcache.h
--- ./include/linux/dcache.h~current~	2006-03-10 22:37:37.000000000 +1100
+++ ./include/linux/dcache.h	2006-03-10 22:37:55.000000000 +1100
@@ -215,7 +215,7 @@ extern struct dentry * d_alloc_anon(stru
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
+extern void shrink_dcache_anon(struct super_block *);
 extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */

diff ./include/linux/fs.h~current~ ./include/linux/fs.h
--- ./include/linux/fs.h~current~	2006-03-10 22:24:03.000000000 +1100
+++ ./include/linux/fs.h	2006-03-10 22:24:03.000000000 +1100
@@ -836,6 +836,9 @@ struct super_block {
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
+	unsigned int		s_prunes;	/* protected by dcache_lock */
+	wait_queue_head_t	s_wait_prunes;
+
 	int			s_frozen;
 	wait_queue_head_t	s_wait_unfrozen;
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux