[RFC PATCH 4/10] vfs: global namespace semaphore

This patch removes the per-namespace semaphore in favor of a global
semaphore.  This can have an effect on namespace scalability.


Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>

 fs/namespace.c            |   44 ++++++++++++++++++++++----------------------
 include/linux/namespace.h |    1 -
 2 files changed, 22 insertions(+), 23 deletions(-)

Index: 2.6.13.sharedsubtree/include/linux/namespace.h
===================================================================
--- 2.6.13.sharedsubtree.orig/include/linux/namespace.h
+++ 2.6.13.sharedsubtree/include/linux/namespace.h
@@ -7,11 +7,10 @@
 
 struct namespace {
 	atomic_t		count;
 	struct vfsmount *	root;
 	struct list_head	list;
-	struct rw_semaphore	sem;
 };
 
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 
Index: 2.6.13.sharedsubtree/fs/namespace.c
===================================================================
--- 2.6.13.sharedsubtree.orig/fs/namespace.c
+++ 2.6.13.sharedsubtree/fs/namespace.c
@@ -41,10 +41,11 @@ static inline int sysfs_init(void)
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache;
+static struct rw_semaphore namespace_sem;
 
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
 	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
 	tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
@@ -190,11 +191,11 @@ static void *m_start(struct seq_file *m,
 {
 	struct namespace *n = m->private;
 	struct list_head *p;
 	loff_t l = *pos;
 
-	down_read(&n->sem);
+	down_read(&namespace_sem);
 	list_for_each(p, &n->list)
 	    if (!l--)
 		return list_entry(p, struct vfsmount, mnt_list);
 	return NULL;
 }
@@ -207,12 +208,11 @@ static void *m_next(struct seq_file *m, 
 	return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
 }
 
 static void m_stop(struct seq_file *m, void *v)
 {
-	struct namespace *n = m->private;
-	up_read(&n->sem);
+	up_read(&namespace_sem);
 }
 
 static inline void mangle(struct seq_file *m, const char *s)
 {
 	seq_escape(m, s, " \t\n\\");
@@ -434,11 +434,11 @@ static int do_umount(struct vfsmount *mn
 		}
 		up_write(&sb->s_umount);
 		return retval;
 	}
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
 
 	if (atomic_read(&sb->s_active) == 1) {
 		/* last instance - try to be smart */
 		spin_unlock(&vfsmount_lock);
@@ -456,11 +456,11 @@ static int do_umount(struct vfsmount *mn
 		retval = 0;
 	}
 	spin_unlock(&vfsmount_lock);
 	if (retval)
 		security_sb_umount_busy(mnt);
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	return retval;
 }
 
 /*
  * Now umount can handle mount points as well as block devices.
@@ -681,11 +681,11 @@ static int do_loopback(struct nameidata 
 		return -EINVAL;
 	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
 	if (err)
 		return err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	err = -EINVAL;
 	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
 		err = -ENOMEM;
 		if (recurse)
 			mnt = copy_tree(old_nd.mnt, old_nd.dentry);
@@ -706,11 +706,11 @@ static int do_loopback(struct nameidata 
 			spin_unlock(&vfsmount_lock);
 		} else
 			mntput(mnt);
 	}
 
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	path_release(&old_nd);
 	return err;
 }
 
 /*
@@ -754,11 +754,11 @@ static int do_move_mount(struct nameidat
 		return -EINVAL;
 	err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
 	if (err)
 		return err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) ;
 	err = -EINVAL;
 	if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
 		goto out;
 
@@ -797,11 +797,11 @@ static int do_move_mount(struct nameidat
       out2:
 	spin_unlock(&vfsmount_lock);
       out1:
 	up(&nd->dentry->d_inode->i_sem);
       out:
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	if (!err)
 		path_release(&parent_nd);
 	path_release(&old_nd);
 	return err;
 }
@@ -836,11 +836,11 @@ static int do_new_mount(struct nameidata
 int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 		 int mnt_flags, struct list_head *fslist)
 {
 	int err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) ;
 	err = -EINVAL;
 	if (!check_mnt(nd->mnt))
 		goto unlock;
@@ -865,11 +865,11 @@ int do_add_mount(struct vfsmount *newmnt
 		list_add_tail(&newmnt->mnt_expire, fslist);
 		spin_unlock(&vfsmount_lock);
 	}
 
       unlock:
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	mntput(newmnt);
 	return err;
 }
 
 EXPORT_SYMBOL_GPL(do_add_mount);
@@ -969,13 +969,13 @@ void mark_mounts_for_expiry(struct list_
 		if (!namespace || !namespace->root)
 			continue;
 		get_namespace(namespace);
 
 		spin_unlock(&vfsmount_lock);
-		down_write(&namespace->sem);
+		down_write(&namespace_sem);
 		expire_mount(mnt, mounts);
-		up_write(&namespace->sem);
+		up_write(&namespace_sem);
 
 		mntput(mnt);
 		put_namespace(namespace);
 
 		spin_lock(&vfsmount_lock);
@@ -1141,18 +1141,17 @@ int copy_namespace(int flags, struct tas
 	new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
 	if (!new_ns)
 		goto out;
 
 	atomic_set(&new_ns->count, 1);
-	init_rwsem(&new_ns->sem);
 	INIT_LIST_HEAD(&new_ns->list);
 
-	down_write(&tsk->namespace->sem);
+	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
 	if (!new_ns->root) {
-		up_write(&tsk->namespace->sem);
+		up_write(&namespace_sem);
 		kfree(new_ns);
 		goto out;
 	}
 	spin_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
@@ -1182,11 +1181,11 @@ int copy_namespace(int flags, struct tas
 			}
 		}
 		p = next_mnt(p, namespace->root);
 		q = next_mnt(q, new_ns->root);
 	}
-	up_write(&tsk->namespace->sem);
+	up_write(&namespace_sem);
 
 	tsk->namespace = new_ns;
 
 	if (rootmnt)
 		mntput(rootmnt);
@@ -1368,11 +1367,11 @@ asmlinkage long sys_pivot_root(const cha
 
 	read_lock(&current->fs->lock);
 	user_nd.mnt = mntget(current->fs->rootmnt);
 	user_nd.dentry = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	down(&old_nd.dentry->d_inode->i_sem);
 	error = -EINVAL;
 	if (!check_mnt(user_nd.mnt))
 		goto out2;
 	error = -ENOENT;
@@ -1414,11 +1413,11 @@ asmlinkage long sys_pivot_root(const cha
 	error = 0;
 	path_release(&root_parent);
 	path_release(&parent_nd);
       out2:
 	up(&old_nd.dentry->d_inode->i_sem);
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	path_release(&user_nd);
 	path_release(&old_nd);
       out1:
 	path_release(&new_nd);
       out0:
@@ -1441,11 +1440,10 @@ static void __init init_mount_tree(void)
 	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
 	if (!namespace)
 		panic("Can't allocate initial namespace");
 	atomic_set(&namespace->count, 1);
 	INIT_LIST_HEAD(&namespace->list);
-	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
 	mnt->mnt_namespace = namespace;
 
 	init_task.namespace = namespace;
@@ -1465,10 +1463,12 @@ void __init mnt_init(unsigned long mempa
 {
 	struct list_head *d;
 	unsigned int nr_hash;
 	int i;
 
+	init_rwsem(&namespace_sem);
+
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
 				      0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL,
 				      NULL);
 
 	mount_hashtable = (struct list_head *)
@@ -1514,12 +1514,12 @@ void __init mnt_init(unsigned long mempa
 void __put_namespace(struct namespace *namespace)
 {
 	struct vfsmount *root = namespace->root;
 	namespace->root = NULL;
 	spin_unlock(&vfsmount_lock);
-	down_write(&namespace->sem);
+	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
 	umount_tree(root);
 	spin_unlock(&vfsmount_lock);
-	up_write(&namespace->sem);
+	up_write(&namespace_sem);
 	kfree(namespace);
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Follow-Ups:
- Re: [RFC PATCH 4/10] vfs: global namespace semaphore
  - From: Al Viro <viro@ftp.linux.org.uk>
Prev by Date: [RFC PATCH 8/10] vfs: shared subtree aware namespaces
Next by Date: [RFC PATCH 7/10] vfs: shared subtree aware unmounts
Previous by thread: [RFC PATCH 8/10] vfs: shared subtree aware namespaces
Next by thread: Re: [RFC PATCH 4/10] vfs: global namespace semaphore
Index(es):
- Date
- Thread
[Index of Archives] [Kernel Newbies] [Netfilter] [Bugtraq] [Photo] [Gimp] [Yosemite News] [MIPS Linux] [ARM Linux] [Linux Security] [Linux RAID] [Video 4 Linux] [Linux for the blind]