Re: [PATCH] fix race in mark_mounts_for_expiry()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> E.g. having a separate task count, which is incremented/decremented
> only by clone/exit.  If the task count goes to zero, umount_tree is
> called on root, but namespace is not freed.
> 
> And each mnt_namespace holds a proper reference to the namespace, so
> it's safe to dereference it anytime.  When truly no more references
> remain, the namespace can go away.
> 
> Hmm?

Here's a patch (compile and boot tested). 

Now there are two solutions to one problem, the previous one is ugly,
this one is buggy.  Which should it be?

Thanks,
Miklos

Index: linux/include/linux/namespace.h
===================================================================
--- linux.orig/include/linux/namespace.h	2005-05-18 18:02:24.000000000 +0200
+++ linux/include/linux/namespace.h	2005-05-18 18:28:48.000000000 +0200
@@ -7,18 +7,26 @@
 
 struct namespace {
 	atomic_t		count;
+	atomic_t		task_count; /* how many tasks use this */
 	struct vfsmount *	root;
 	struct list_head	list;
 	struct rw_semaphore	sem;
 };
 
 extern int copy_namespace(int, struct task_struct *);
-extern void __put_namespace(struct namespace *namespace);
+extern void __exit_namespace(struct namespace *namespace);
 
 static inline void put_namespace(struct namespace *namespace)
 {
-	if (atomic_dec_and_test(&namespace->count))
-		__put_namespace(namespace);
+	if (namespace && atomic_dec_and_test(&namespace->count))
+		kfree(namespace);
+}
+
+static inline struct namespace *get_namespace(struct namespace *namespace)
+{
+	if (namespace)
+		atomic_inc(&namespace->count);
+	return namespace;
 }
 
 static inline void exit_namespace(struct task_struct *p)
@@ -28,14 +36,10 @@ static inline void exit_namespace(struct
 		task_lock(p);
 		p->namespace = NULL;
 		task_unlock(p);
-		put_namespace(namespace);
+		if (atomic_dec_and_test(&p->namespace->task_count))
+			__exit_namespace(p->namespace);
 	}
 }
 
-static inline void get_namespace(struct namespace *namespace)
-{
-	atomic_inc(&namespace->count);
-}
-
 #endif
 #endif
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c	2005-05-18 17:42:46.000000000 +0200
+++ linux/fs/namespace.c	2005-05-18 18:32:29.000000000 +0200
@@ -160,7 +160,7 @@ clone_mnt(struct vfsmount *old, struct d
 		mnt->mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt_root;
 		mnt->mnt_parent = mnt;
-		mnt->mnt_namespace = current->namespace;
+		mnt->mnt_namespace = get_namespace(current->namespace);
 
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
@@ -345,13 +345,15 @@ static void umount_tree(struct vfsmount 
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
 		list_del(&p->mnt_list);
 		list_add(&p->mnt_list, &kill);
-		p->mnt_namespace = NULL;
 	}
 
 	while (!list_empty(&kill)) {
+		struct namespace *n;
 		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
 		list_del_init(&mnt->mnt_list);
 		list_del_init(&mnt->mnt_fslink);
+		n = mnt->mnt_namespace;
+		mnt->mnt_namespace = NULL;
 		if (mnt->mnt_parent == mnt) {
 			spin_unlock(&vfsmount_lock);
 		} else {
@@ -361,6 +363,7 @@ static void umount_tree(struct vfsmount 
 			path_release(&old_nd);
 		}
 		mntput(mnt);
+		put_namespace(n);
 		spin_lock(&vfsmount_lock);
 	}
 }
@@ -866,12 +869,9 @@ void mark_mounts_for_expiry(struct list_
 		mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
 		list_del_init(&mnt->mnt_fslink);
 
-		/* don't do anything if the namespace is dead - all the
-		 * vfsmounts from it are going away anyway */
-		namespace = mnt->mnt_namespace;
-		if (!namespace || atomic_read(&namespace->count) <= 0)
+		namespace = get_namespace(mnt->mnt_namespace);
+		if (!namespace)
 			continue;
-		get_namespace(namespace);
 
 		spin_unlock(&vfsmount_lock);
 		down_write(&namespace->sem);
@@ -1073,8 +1073,10 @@ int copy_namespace(int flags, struct tas
 
 	get_namespace(namespace);
 
-	if (!(flags & CLONE_NEWNS))
+	if (!(flags & CLONE_NEWNS)) {
+		atomic_inc(&namespace->task_count);
 		return 0;
+	}
 
 	if (!capable(CAP_SYS_ADMIN)) {
 		put_namespace(namespace);
@@ -1086,6 +1088,7 @@ int copy_namespace(int flags, struct tas
 		goto out;
 
 	atomic_set(&new_ns->count, 1);
+	atomic_set(&new_ns->task_count, 1);
 	init_rwsem(&new_ns->sem);
 	INIT_LIST_HEAD(&new_ns->list);
 
@@ -1109,7 +1112,9 @@ int copy_namespace(int flags, struct tas
 	p = namespace->root;
 	q = new_ns->root;
 	while (p) {
-		q->mnt_namespace = new_ns;
+		struct namespace *oldns = q->mnt_namespace;
+		q->mnt_namespace = get_namespace(new_ns);
+		put_namespace(oldns);
 		if (fs) {
 			if (p == fs->rootmnt) {
 				rootmnt = p;
@@ -1381,16 +1386,17 @@ static void __init init_mount_tree(void)
 	if (!namespace)
 		panic("Can't allocate initial namespace");
 	atomic_set(&namespace->count, 1);
+	atomic_set(&namespace->task_count, 0);
 	INIT_LIST_HEAD(&namespace->list);
 	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
-	mnt->mnt_namespace = namespace;
+	mnt->mnt_namespace = get_namespace(namespace);
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		get_namespace(namespace);
+		atomic_inc(&namespace->task_count);
 		p->namespace = namespace;
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
@@ -1448,12 +1454,13 @@ void __init mnt_init(unsigned long mempa
 	init_mount_tree();
 }
 
-void __put_namespace(struct namespace *namespace)
+void __exit_namespace(struct namespace *namespace)
 {
 	down_write(&namespace->sem);
 	spin_lock(&vfsmount_lock);
 	umount_tree(namespace->root);
+	namespace->root = NULL;
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace->sem);
-	kfree(namespace);
+	put_namespace(namespace);
 }
Index: linux/fs/super.c
===================================================================
--- linux.orig/fs/super.c	2005-05-17 13:46:56.000000000 +0200
+++ linux/fs/super.c	2005-05-18 18:44:18.000000000 +0200
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>		/* for the emergency remount stuff */
 #include <linux/idr.h>
 #include <linux/kobject.h>
+#include <linux/namespace.h>
 #include <asm/uaccess.h>
 
 
@@ -842,7 +843,7 @@ do_kern_mount(const char *fstype, int fl
 	mnt->mnt_root = dget(sb->s_root);
 	mnt->mnt_mountpoint = sb->s_root;
 	mnt->mnt_parent = mnt;
-	mnt->mnt_namespace = current->namespace;
+	mnt->mnt_namespace = get_namespace(current->namespace);
 	up_write(&sb->s_umount);
 	put_filesystem(type);
 	return mnt;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux