Re: Race between "mount" uevent and /proc/mounts?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 01, 2005 at 10:35:25PM +0100, Kay Sievers wrote:
> On Tue, Nov 01, 2005 at 10:54:49PM +0300, Sergey Vlasov wrote:
> > On Tue, Nov 01, 2005 at 04:58:16AM +0100, Kay Sievers wrote:
> > > On Tue, Nov 01, 2005 at 01:28:46AM +0100, Kay Sievers wrote:
> > > > Ok, makes sense. The attached seems to work for me. If we can get
> > > > something like this, we can remove the superblock claim/release events
> > > > completely and just read the events from the /proc/mounts file itself.
> > 
> > No, we need both events.  When you need to tell the user when it is
> > safe to disconnect the storage device, the event from detach_mnt() is
> > useless - it happens too early.  In fact, even the current way of
> > sending the event from kill_block_super() is broken, because the event
> > is generated before generic_shutdown_super() and sync_blockdev(), and
> > writing out cached data may take some time.
> > 
> > We could try to emit busy/free events from bd_claim() and
> > bd_release(); this would be triggered by most "interesting" users
> > (even opens with O_EXCL), but not by things like volume_id.
> 
> Hmm, HAL polls optical drives every 2 seconds with O_EXCL to detect media
> changes. You need to do it EXCL, cause otherwise some cd burners fail.

Crap...  Then we can only move the bdev_uevent() call in
kill_block_super() later, and other users of block devices will not be
noticed at all.

> > > New patch. Missed a check for namespace == NULL in detach_mnt().
[skip]
> > This assumes that there will be only one process per namespace which
> > will call poll() on /proc/mounts.  Even though someone may argue that
> > it is the right approach (have a single process which watches
> > /proc/mounts and broadcasts updates to other interested processes,
> > e.g., over dbus), with the above implementation any unprivileged user
> > can call poll() and interfere with the operation of that designated
> > process.
> 
> Sure, capable(CAP_SYS_ADMIN) could prevent this.

poll() protected by CAP_SYS_ADMIN?  Ewww.

/proc/bus/usb/devices uses event counters in its poll() implementation;
something like this could be done here too.  What about the following
patch (compile tested only):

-----------------------------------------------------------------------

Add poll support for /proc/.../mounts

Now poll() or select() system calls can be used to wait for mount tree
changes.

Signed-off-by: Sergey Vlasov <[email protected]>


---

 fs/namespace.c            |   19 ++++++++++-
 fs/proc/base.c            |   75 ++++++++++++++++++++++++++++++++++-----------
 include/linux/namespace.h |    9 +++++
 3 files changed, 82 insertions(+), 21 deletions(-)

applies-to: b87f06d928e0ea06ae6244c1aeecf3e745f39bb9
1704c384737e1cdbca3194f3471195c9ee4377a5
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf..4182210 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,6 +39,8 @@ static inline int sysfs_init(void)
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
+DECLARE_WAIT_QUEUE_HEAD(mounts_wait);
+
 static struct list_head *mount_hashtable;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache; 
@@ -120,6 +122,10 @@ static void detach_mnt(struct vfsmount *
 	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
 	old_nd->dentry->d_mounted--;
+	if (current->namespace) {
+		current->namespace->event++;
+		wake_up_interruptible(&mounts_wait);
+	}
 }
 
 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
@@ -129,6 +135,8 @@ static void attach_mnt(struct vfsmount *
 	list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
 	list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
 	nd->dentry->d_mounted++;
+	current->namespace->event++;
+	wake_up_interruptible(&mounts_wait);
 }
 
 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -185,7 +193,8 @@ EXPORT_SYMBOL(__mntput);
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	struct list_head *p;
 	loff_t l = *pos;
 
@@ -198,7 +207,8 @@ static void *m_start(struct seq_file *m,
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
 	(*pos)++;
 	return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
@@ -206,7 +216,8 @@ static void *m_next(struct seq_file *m, 
 
 static void m_stop(struct seq_file *m, void *v)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	up_read(&n->sem);
 }
 
@@ -1093,6 +1104,7 @@ int copy_namespace(int flags, struct tas
 	atomic_set(&new_ns->count, 1);
 	init_rwsem(&new_ns->sem);
 	INIT_LIST_HEAD(&new_ns->list);
+	new_ns->event = 0;
 
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
@@ -1394,6 +1406,7 @@ static void __init init_mount_tree(void)
 	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
+	namespace->event = 0;
 	mnt->mnt_namespace = namespace;
 
 	init_task.namespace = namespace;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a170450..41462c0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -57,6 +57,7 @@
 #include <linux/init.h>
 #include <linux/file.h>
 #include <linux/string.h>
+#include <linux/poll.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
 #include <linux/namespace.h>
@@ -663,39 +664,77 @@ extern struct seq_operations mounts_op;
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = proc_task(inode);
-	int ret = seq_open(file, &mounts_op);
+	struct proc_mounts_private *private;
+	struct seq_file *m;
+	struct namespace *namespace;
+	int ret = -ENOMEM;
+
+	private = kmalloc(sizeof(*private), GFP_KERNEL);
+	if (!private)
+		goto out;
+	ret = seq_open(file, &mounts_op);
+	if (ret)
+		goto out_free_private;
+	m = file->private_data;
+	m->private = private;
 
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		struct namespace *namespace;
-		task_lock(task);
-		namespace = task->namespace;
-		if (namespace)
-			get_namespace(namespace);
-		task_unlock(task);
-
-		if (namespace)
-			m->private = namespace;
-		else {
-			seq_release(inode, file);
-			ret = -EINVAL;
-		}
+	task_lock(task);
+	namespace = task->namespace;
+	if (namespace)
+		get_namespace(namespace);
+	task_unlock(task);
+	if (!namespace) {
+		ret = -EINVAL;
+		goto out_release;
 	}
+
+	private->namespace = namespace;
+	down_read(&namespace->sem);
+	private->last_event = namespace->event;
+	up_read(&namespace->sem);
+out:
 	return ret;
+
+out_release:
+	seq_release(inode, file);
+out_free_private:
+	kfree(private);
+	goto out;
 }
 
 static int mounts_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	struct namespace *namespace = m->private;
-	put_namespace(namespace);
+	struct proc_mounts_private *private = m->private;
+	put_namespace(private->namespace);
+	kfree(private);
 	return seq_release(inode, file);
 }
 
+static unsigned int mounts_poll(struct file *file, poll_table *wait)
+{
+	struct seq_file *m = file->private_data;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *namespace = private->namespace;
+	int ret = 0;
+
+	poll_wait(file, &mounts_wait, wait);
+
+	down_read(&namespace->sem);
+	if (private->last_event != namespace->event) {
+		private->last_event = namespace->event;
+		ret = POLLIN | POLLRDNORM;
+	}
+	up_read(&namespace->sem);
+
+	return ret;
+}
+
 static struct file_operations proc_mounts_operations = {
 	.open		= mounts_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.poll		= mounts_poll,
 	.release	= mounts_release,
 };
 
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 0e5a86f..fe5840a 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -10,6 +10,7 @@ struct namespace {
 	struct vfsmount *	root;
 	struct list_head	list;
 	struct rw_semaphore	sem;
+	unsigned int		event;
 };
 
 extern int copy_namespace(int, struct task_struct *);
@@ -38,5 +39,13 @@ static inline void get_namespace(struct 
 	atomic_inc(&namespace->count);
 }
 
+/* /proc/.../mounts file descriptor state */
+struct proc_mounts_private {
+	struct namespace *	namespace;
+	unsigned int		last_event;
+};
+
+extern wait_queue_head_t mounts_wait;
+
 #endif
 #endif
---
0.99.8.GIT

Attachment: pgpZoxpHzPRFW.pgp
Description: PGP signature


[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux