Re: [PATCH] fix race in mark_mounts_for_expiry()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2005-05-18 at 09:53, Miklos Szeredi wrote:
> > E.g. having a separate task count, which is incremented/decremented
> > only by clone/exit.  If the task count goes to zero, umount_tree is
> > called on root, but namespace is not freed.
> > 
> > And each mnt_namespace holds a proper reference to the namespace, so
> > it's safe to dereference it anytime.  When truly no more references
> > remain, the namespace can go away.
> > 
> > Hmm?

First of all the reason this race exists implies Al Viro may have had
assertion in his mind that "All tasks that have access to a namespace
has a refcount on that namespace".  If that was what he was thinking,
than the I would stick with that assertion being true. The overall idea
I am thinking off is:

1) have the automounter hold a refcount on any new namespace created
    the mounts in which the automounter has interest in.
2) have a refcount on the namespace when a new task gains access to
   a namespace through the file descriptor or any other 
   similar mechanisms and remove the reference
   once the fd gets closed. (bit tricky to implement)

Do you agree with the overall idea? 
If so I will try to come up with a patch. 

RP

> 
> Here's a patch (compile and boot tested). 
> 
> Now there are two solutions to one problem, the previous one is ugly,
> this one is buggy.  Which should it be?
> 
> Thanks,
> Miklos
> 
> Index: linux/include/linux/namespace.h
> ===================================================================
> --- linux.orig/include/linux/namespace.h	2005-05-18 18:02:24.000000000 +0200
> +++ linux/include/linux/namespace.h	2005-05-18 18:28:48.000000000 +0200
> @@ -7,18 +7,26 @@
>  
>  struct namespace {
>  	atomic_t		count;
> +	atomic_t		task_count; /* how many tasks use this */
>  	struct vfsmount *	root;
>  	struct list_head	list;
>  	struct rw_semaphore	sem;
>  };
>  
>  extern int copy_namespace(int, struct task_struct *);
> -extern void __put_namespace(struct namespace *namespace);
> +extern void __exit_namespace(struct namespace *namespace);
>  
>  static inline void put_namespace(struct namespace *namespace)
>  {
> -	if (atomic_dec_and_test(&namespace->count))
> -		__put_namespace(namespace);
> +	if (namespace && atomic_dec_and_test(&namespace->count))
> +		kfree(namespace);
> +}
> +
> +static inline struct namespace *get_namespace(struct namespace *namespace)
> +{
> +	if (namespace)
> +		atomic_inc(&namespace->count);
> +	return namespace;
>  }
>  
>  static inline void exit_namespace(struct task_struct *p)
> @@ -28,14 +36,10 @@ static inline void exit_namespace(struct
>  		task_lock(p);
>  		p->namespace = NULL;
>  		task_unlock(p);
> -		put_namespace(namespace);
> +		if (atomic_dec_and_test(&p->namespace->task_count))
> +			__exit_namespace(p->namespace);
>  	}
>  }
>  
> -static inline void get_namespace(struct namespace *namespace)
> -{
> -	atomic_inc(&namespace->count);
> -}
> -
>  #endif
>  #endif
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c	2005-05-18 17:42:46.000000000 +0200
> +++ linux/fs/namespace.c	2005-05-18 18:32:29.000000000 +0200
> @@ -160,7 +160,7 @@ clone_mnt(struct vfsmount *old, struct d
>  		mnt->mnt_root = dget(root);
>  		mnt->mnt_mountpoint = mnt->mnt_root;
>  		mnt->mnt_parent = mnt;
> -		mnt->mnt_namespace = current->namespace;
> +		mnt->mnt_namespace = get_namespace(current->namespace);
>  
>  		/* stick the duplicate mount on the same expiry list
>  		 * as the original if that was on one */
> @@ -345,13 +345,15 @@ static void umount_tree(struct vfsmount 
>  	for (p = mnt; p; p = next_mnt(p, mnt)) {
>  		list_del(&p->mnt_list);
>  		list_add(&p->mnt_list, &kill);
> -		p->mnt_namespace = NULL;
>  	}
>  
>  	while (!list_empty(&kill)) {
> +		struct namespace *n;
>  		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
>  		list_del_init(&mnt->mnt_list);
>  		list_del_init(&mnt->mnt_fslink);
> +		n = mnt->mnt_namespace;
> +		mnt->mnt_namespace = NULL;
>  		if (mnt->mnt_parent == mnt) {
>  			spin_unlock(&vfsmount_lock);
>  		} else {
> @@ -361,6 +363,7 @@ static void umount_tree(struct vfsmount 
>  			path_release(&old_nd);
>  		}
>  		mntput(mnt);
> +		put_namespace(n);
>  		spin_lock(&vfsmount_lock);
>  	}
>  }
> @@ -866,12 +869,9 @@ void mark_mounts_for_expiry(struct list_
>  		mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
>  		list_del_init(&mnt->mnt_fslink);
>  
> -		/* don't do anything if the namespace is dead - all the
> -		 * vfsmounts from it are going away anyway */
> -		namespace = mnt->mnt_namespace;
> -		if (!namespace || atomic_read(&namespace->count) <= 0)
> +		namespace = get_namespace(mnt->mnt_namespace);
> +		if (!namespace)
>  			continue;
> -		get_namespace(namespace);
>  
>  		spin_unlock(&vfsmount_lock);
>  		down_write(&namespace->sem);
> @@ -1073,8 +1073,10 @@ int copy_namespace(int flags, struct tas
>  
>  	get_namespace(namespace);
>  
> -	if (!(flags & CLONE_NEWNS))
> +	if (!(flags & CLONE_NEWNS)) {
> +		atomic_inc(&namespace->task_count);
>  		return 0;
> +	}
>  
>  	if (!capable(CAP_SYS_ADMIN)) {
>  		put_namespace(namespace);
> @@ -1086,6 +1088,7 @@ int copy_namespace(int flags, struct tas
>  		goto out;
>  
>  	atomic_set(&new_ns->count, 1);
> +	atomic_set(&new_ns->task_count, 1);
>  	init_rwsem(&new_ns->sem);
>  	INIT_LIST_HEAD(&new_ns->list);
>  
> @@ -1109,7 +1112,9 @@ int copy_namespace(int flags, struct tas
>  	p = namespace->root;
>  	q = new_ns->root;
>  	while (p) {
> -		q->mnt_namespace = new_ns;
> +		struct namespace *oldns = q->mnt_namespace;
> +		q->mnt_namespace = get_namespace(new_ns);
> +		put_namespace(oldns);
>  		if (fs) {
>  			if (p == fs->rootmnt) {
>  				rootmnt = p;
> @@ -1381,16 +1386,17 @@ static void __init init_mount_tree(void)
>  	if (!namespace)
>  		panic("Can't allocate initial namespace");
>  	atomic_set(&namespace->count, 1);
> +	atomic_set(&namespace->task_count, 0);
>  	INIT_LIST_HEAD(&namespace->list);
>  	init_rwsem(&namespace->sem);
>  	list_add(&mnt->mnt_list, &namespace->list);
>  	namespace->root = mnt;
> -	mnt->mnt_namespace = namespace;
> +	mnt->mnt_namespace = get_namespace(namespace);
>  
>  	init_task.namespace = namespace;
>  	read_lock(&tasklist_lock);
>  	do_each_thread(g, p) {
> -		get_namespace(namespace);
> +		atomic_inc(&namespace->task_count);
>  		p->namespace = namespace;
>  	} while_each_thread(g, p);
>  	read_unlock(&tasklist_lock);
> @@ -1448,12 +1454,13 @@ void __init mnt_init(unsigned long mempa
>  	init_mount_tree();
>  }
>  
> -void __put_namespace(struct namespace *namespace)
> +void __exit_namespace(struct namespace *namespace)
>  {
>  	down_write(&namespace->sem);
>  	spin_lock(&vfsmount_lock);
>  	umount_tree(namespace->root);
> +	namespace->root = NULL;
>  	spin_unlock(&vfsmount_lock);
>  	up_write(&namespace->sem);
> -	kfree(namespace);
> +	put_namespace(namespace);
>  }
> Index: linux/fs/super.c
> ===================================================================
> --- linux.orig/fs/super.c	2005-05-17 13:46:56.000000000 +0200
> +++ linux/fs/super.c	2005-05-18 18:44:18.000000000 +0200
> @@ -37,6 +37,7 @@
>  #include <linux/writeback.h>		/* for the emergency remount stuff */
>  #include <linux/idr.h>
>  #include <linux/kobject.h>
> +#include <linux/namespace.h>
>  #include <asm/uaccess.h>
>  
> 
> @@ -842,7 +843,7 @@ do_kern_mount(const char *fstype, int fl
>  	mnt->mnt_root = dget(sb->s_root);
>  	mnt->mnt_mountpoint = sb->s_root;
>  	mnt->mnt_parent = mnt;
> -	mnt->mnt_namespace = current->namespace;
> +	mnt->mnt_namespace = get_namespace(current->namespace);
>  	up_write(&sb->s_umount);
>  	put_filesystem(type);
>  	return mnt;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux