[PATCH] Per-superblock unused dentry LRU lists.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



As originally described in:

http://marc.theaimsgroup.com/?l=linux-kernel&m=114491661527656&w=2

shrink_dcache_sb() becomes a severe bottleneck when the unused dentry
list becomes long and mounts and unmounts occur frequently on the
machine.

My initial attempt to solve the problem using per-node LRUs
(http://marc.theaimsgroup.com/?l=linux-kernel&m=114537118504917&w=2)
fell apart under the complexity of locking required. This approach can
probably be made to work in the long term, but we need a robust fix 
for now....

The patch attached below is based on the suggestion made by Andrew
Morton here:

http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114499224412427&w=2

This approach does not change any of the locking required, so avoids
the locking problems of the per-node lru implementation.

I've attempted to make reclaim fair by keeping track of the last superblock
we pruned, and starting from the next on in the list each time.

Signed-off-by: Dave Chinner <[email protected]>

---
The patch has been stress tested on single and multiple filesystems,
using dbench, postmark and parallel create/unlink load tests. It has also
been running on the problem server since last Saturday which currently
has ~11 million cached dentries, and a dentry_cache slab size of ~27 million
objects.

% diffstat patches/dcache-per-sb-lru
 fs/dcache.c        |  214 +++++++++++++++++++++++++++++------------------------
 fs/super.c         |    1
 include/linux/fs.h |    1
 3 files changed, 121 insertions(+), 95 deletions(-)


Index: 2.6.x-xfs-new/fs/dcache.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/dcache.c	2006-05-15 16:24:44.212207654 +1000
+++ 2.6.x-xfs-new/fs/dcache.c	2006-05-16 14:00:46.327462206 +1000
@@ -62,7 +62,6 @@ static kmem_cache_t *dentry_cache; 
 static unsigned int d_hash_mask;
 static unsigned int d_hash_shift;
 static struct hlist_head *dentry_hashtable;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -114,6 +113,38 @@ static void dentry_iput(struct dentry * 
 	}
 }
 
+static void
+dentry_lru_add(struct dentry *dentry)
+{
+	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry_stat.nr_unused++;
+}
+
+static void
+dentry_lru_add_tail(struct dentry *dentry)
+{
+	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry_stat.nr_unused++;
+}
+
+static void
+dentry_lru_del(struct dentry *dentry)
+{
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry_stat.nr_unused--;
+	}
+}
+
+static void
+dentry_lru_del_init(struct dentry *dentry)
+{
+	if (likely(!list_empty(&dentry->d_lru))) {
+		list_del_init(&dentry->d_lru);
+		dentry_stat.nr_unused--;
+	}
+}
+
 /* 
  * This is dput
  *
@@ -173,8 +204,7 @@ repeat:
 		goto kill_it;
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
-  		list_add(&dentry->d_lru, &dentry_unused);
-  		dentry_stat.nr_unused++;
+		dentry_lru_add(dentry);
   	}
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
@@ -186,13 +216,8 @@ unhash_it:
 kill_it: {
 		struct dentry *parent;
 
-		/* If dentry was on d_lru list
-		 * delete it from there
-		 */
-  		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
-  			dentry_stat.nr_unused--;
-  		}
+		/* If dentry was on d_lru list delete it from there */
+		dentry_lru_del(dentry);
   		list_del(&dentry->d_u.d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/*drops the locks, at that point nobody can reach this dentry */
@@ -268,10 +293,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	if (!list_empty(&dentry->d_lru)) {
-		dentry_stat.nr_unused--;
-		list_del_init(&dentry->d_lru);
-	}
+	dentry_lru_del_init(dentry);
 	return dentry;
 }
 
@@ -377,6 +399,48 @@ static inline void prune_one_dentry(stru
 	spin_lock(&dcache_lock);
 }
 
+/*
+ * Shrink the dentry LRU on æ given superblock.
+ */
+static void
+__shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+{
+	struct dentry *dentry;
+	int cnt = (count == NULL) ? -1 : *count;
+
+	spin_lock(&dcache_lock);
+	while (!list_empty(&sb->s_dentry_lru) && cnt--) {
+		dentry = list_entry(sb->s_dentry_lru.prev,
+					struct dentry, d_lru);
+		dentry_lru_del_init(dentry);
+		BUG_ON(dentry->d_sb != sb);
+		prefetch(sb->s_dentry_lru.prev);
+
+		spin_lock(&dentry->d_lock);
+		/*
+		 * We found an inuse dentry which was not removed from
+		 * the LRU because of laziness during lookup.  Do not free
+		 * it - just keep it off the LRU list.
+		 */
+		if (atomic_read(&dentry->d_count)) {
+			spin_unlock(&dentry->d_lock);
+			continue;
+		}
+		/* If the dentry matches the flags passed in, don't free it.
+		 * clear the flags and put it back on the LRU */
+		if (flags && (dentry->d_flags & flags)) {
+			dentry->d_flags &= ~flags;
+			dentry_lru_add(dentry);
+			spin_unlock(&dentry->d_lock);
+			continue;
+		}
+		prune_one_dentry(dentry);
+	}
+	spin_unlock(&dcache_lock);
+	if (count)
+		*count = cnt;
+}
+
 /**
  * prune_dcache - shrink the dcache
  * @count: number of entries to try and free
@@ -392,42 +456,44 @@ static inline void prune_one_dentry(stru
  
 static void prune_dcache(int count)
 {
-	spin_lock(&dcache_lock);
-	for (; count ; count--) {
-		struct dentry *dentry;
-		struct list_head *tmp;
-
-		cond_resched_lock(&dcache_lock);
+	struct super_block *sb;
+	static struct super_block *sb_hand = NULL;
+	int work_done = 0;
+
+	spin_lock(&sb_lock);
+	if (sb_hand == NULL)
+		sb_hand = sb_entry(super_blocks.next);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb != sb_hand)
+			continue;
+		/* Found the next superblock to work on.
+		 * Move the hand forwards so that parallel
+		 * pruners will work on a different sb */
+		work_done++;
+		sb_hand = sb_entry(sb->s_list.next);
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+
+		/* we don't take the s_umount lock here because
+		 * because we can be called already holding a
+		 * write lock on a superblock */
+		if (!list_empty(&sb->s_dentry_lru))
+			__shrink_dcache_sb(sb, &count, DCACHE_REFERENCED);
 
-		tmp = dentry_unused.prev;
-		if (tmp == &dentry_unused)
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb) && count)
+			goto restart;
+		if (count <= 0)
 			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
- 		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
-
- 		spin_lock(&dentry->d_lock);
-		/*
-		 * We found an inuse dentry which was not removed from
-		 * dentry_unused because of laziness during lookup.  Do not free
-		 * it - just keep it off the dentry_unused list.
-		 */
- 		if (atomic_read(&dentry->d_count)) {
- 			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		/* If the dentry was recently referenced, don't free it. */
-		if (dentry->d_flags & DCACHE_REFERENCED) {
-			dentry->d_flags &= ~DCACHE_REFERENCED;
- 			list_add(&dentry->d_lru, &dentry_unused);
- 			dentry_stat.nr_unused++;
- 			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		prune_one_dentry(dentry);
 	}
-	spin_unlock(&dcache_lock);
+	if (!work_done) {
+		/* sb_hand is stale. Start and the beginning of the
+		 * list again. */
+		sb_hand = sb_entry(super_blocks.next);
+		goto restart;
+	}
+	spin_unlock(&sb_lock);
 }
 
 /*
@@ -454,41 +520,7 @@ static void prune_dcache(int count)
 
 void shrink_dcache_sb(struct super_block * sb)
 {
-	struct list_head *tmp, *next;
-	struct dentry *dentry;
-
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
-	spin_lock(&dcache_lock);
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_del(tmp);
-		list_add(tmp, &dentry_unused);
-	}
-
-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		dentry_stat.nr_unused--;
-		list_del_init(tmp);
-		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count)) {
-			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		prune_one_dentry(dentry);
-		goto repeat;
-	}
-	spin_unlock(&dcache_lock);
+	__shrink_dcache_sb(sb, NULL, 0);
 }
 
 /*
@@ -572,17 +604,13 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
-		if (!list_empty(&dentry->d_lru)) {
-			dentry_stat.nr_unused--;
-			list_del_init(&dentry->d_lru);
-		}
+		dentry_lru_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			list_add(&dentry->d_lru, dentry_unused.prev);
-			dentry_stat.nr_unused++;
+			dentry_lru_add_tail(dentry);
 			found++;
 		}
 
@@ -657,18 +685,14 @@ void shrink_dcache_anon(struct hlist_hea
 		spin_lock(&dcache_lock);
 		hlist_for_each(lp, head) {
 			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-			if (!list_empty(&this->d_lru)) {
-				dentry_stat.nr_unused--;
-				list_del_init(&this->d_lru);
-			}
 
+			dentry_lru_del_init(this);
 			/* 
 			 * move only zero ref count dentries to the end 
 			 * of the unused list for prune_dcache
 			 */
 			if (!atomic_read(&this->d_count)) {
-				list_add_tail(&this->d_lru, &dentry_unused);
-				dentry_stat.nr_unused++;
+				dentry_lru_add_tail(this);
 				found++;
 			}
 		}
@@ -1019,7 +1043,7 @@ struct dentry *d_splice_alias(struct ino
  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
  * lookup is going on.
  *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
  * acquisition.
Index: 2.6.x-xfs-new/include/linux/fs.h
===================================================================
--- 2.6.x-xfs-new.orig/include/linux/fs.h	2006-03-17 13:16:16.000000000 +1100
+++ 2.6.x-xfs-new/include/linux/fs.h	2006-05-15 17:14:53.532277564 +1000
@@ -831,6 +831,7 @@ struct super_block {
 	struct list_head	s_io;		/* parked for writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
+	struct list_head	s_dentry_lru;	/* unused dentry lru */
 
 	struct block_device	*s_bdev;
 	struct list_head	s_instances;
Index: 2.6.x-xfs-new/fs/super.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/super.c	2006-03-17 13:16:12.000000000 +1100
+++ 2.6.x-xfs-new/fs/super.c	2006-05-15 17:49:59.009147060 +1000
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(v
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
+		INIT_LIST_HEAD(&s->s_dentry_lru);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		down_write(&s->s_umount);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux