On Tue, Nov 01, 2005 at 10:35:25PM +0100, Kay Sievers wrote: > On Tue, Nov 01, 2005 at 10:54:49PM +0300, Sergey Vlasov wrote: > > On Tue, Nov 01, 2005 at 04:58:16AM +0100, Kay Sievers wrote: > > > On Tue, Nov 01, 2005 at 01:28:46AM +0100, Kay Sievers wrote: > > > > Ok, makes sense. The attached seems to work for me. If we can get > > > > something like this, we can remove the superblock claim/release events > > > > completely and just read the events from the /proc/mounts file itself. > > > > No, we need both events. When you need to tell the user when it is > > safe to disconnect the storage device, the event from detach_mnt() is > > useless - it happens too early. In fact, even the current way of > > sending the event from kill_block_super() is broken, because the event > > is generated before generic_shutdown_super() and sync_blockdev(), and > > writing out cached data may take some time. > > > > We could try to emit busy/free events from bd_claim() and > > bd_release(); this would be triggered by most "interesting" users > > (even opens with O_EXCL), but not by things like volume_id. > > Hmm, HAL polls optical drives every 2 seconds with O_EXCL to detect media > changes. You need to do it EXCL, cause otherwise some cd burners fail. Crap... Then we can only move the bdev_uevent() call in kill_block_super() later, and other users of block devices will not be noticed at all. > > > New patch. Missed a check for namespace == NULL in detach_mnt(). [skip] > > This assumes that there will be only one process per namespace which > > will call poll() on /proc/mounts. Even though someone may argue that > > it is the right approach (have a single process which watches > > /proc/mounts and broadcasts updates to other interested processes, > > e.g., over dbus), with the above implementation any unprivileged user > > can call poll() and interfere with the operation of that designated > > process. > > Sure, capable(CAP_SYS_ADMIN) could prevent this. poll() protected by CAP_SYS_ADMIN? Ewww. /proc/bus/usb/devices uses event counters in its poll() implementation; something like this could be done here too. What about the following patch (compile tested only): ----------------------------------------------------------------------- Add poll support for /proc/.../mounts Now poll() or select() system calls can be used to wait for mount tree changes. Signed-off-by: Sergey Vlasov <[email protected]> --- fs/namespace.c | 19 ++++++++++- fs/proc/base.c | 75 ++++++++++++++++++++++++++++++++++----------- include/linux/namespace.h | 9 +++++ 3 files changed, 82 insertions(+), 21 deletions(-) applies-to: b87f06d928e0ea06ae6244c1aeecf3e745f39bb9 1704c384737e1cdbca3194f3471195c9ee4377a5 diff --git a/fs/namespace.c b/fs/namespace.c index 2fa9fdf..4182210 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -39,6 +39,8 @@ static inline int sysfs_init(void) /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); +DECLARE_WAIT_QUEUE_HEAD(mounts_wait); + static struct list_head *mount_hashtable; static int hash_mask __read_mostly, hash_bits __read_mostly; static kmem_cache_t *mnt_cache; @@ -120,6 +122,10 @@ static void detach_mnt(struct vfsmount * list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); old_nd->dentry->d_mounted--; + if (current->namespace) { + current->namespace->event++; + wake_up_interruptible(&mounts_wait); + } } static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) @@ -129,6 +135,8 @@ static void attach_mnt(struct vfsmount * list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); nd->dentry->d_mounted++; + current->namespace->event++; + wake_up_interruptible(&mounts_wait); } static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) @@ -185,7 +193,8 @@ EXPORT_SYMBOL(__mntput); /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) { - struct namespace *n = m->private; + struct proc_mounts_private *private = m->private; + struct namespace *n = private->namespace; struct list_head *p; loff_t l = *pos; @@ -198,7 +207,8 @@ static void *m_start(struct seq_file *m, static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct namespace *n = m->private; + struct proc_mounts_private *private = m->private; + struct namespace *n = private->namespace; struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; (*pos)++; return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); @@ -206,7 +216,8 @@ static void *m_next(struct seq_file *m, static void m_stop(struct seq_file *m, void *v) { - struct namespace *n = m->private; + struct proc_mounts_private *private = m->private; + struct namespace *n = private->namespace; up_read(&n->sem); } @@ -1093,6 +1104,7 @@ int copy_namespace(int flags, struct tas atomic_set(&new_ns->count, 1); init_rwsem(&new_ns->sem); INIT_LIST_HEAD(&new_ns->list); + new_ns->event = 0; down_write(&tsk->namespace->sem); /* First pass: copy the tree topology */ @@ -1394,6 +1406,7 @@ static void __init init_mount_tree(void) init_rwsem(&namespace->sem); list_add(&mnt->mnt_list, &namespace->list); namespace->root = mnt; + namespace->event = 0; mnt->mnt_namespace = namespace; init_task.namespace = namespace; diff --git a/fs/proc/base.c b/fs/proc/base.c index a170450..41462c0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -57,6 +57,7 @@ #include <linux/init.h> #include <linux/file.h> #include <linux/string.h> +#include <linux/poll.h> #include <linux/seq_file.h> #include <linux/namei.h> #include <linux/namespace.h> @@ -663,39 +664,77 @@ extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = proc_task(inode); - int ret = seq_open(file, &mounts_op); + struct proc_mounts_private *private; + struct seq_file *m; + struct namespace *namespace; + int ret = -ENOMEM; + + private = kmalloc(sizeof(*private), GFP_KERNEL); + if (!private) + goto out; + ret = seq_open(file, &mounts_op); + if (ret) + goto out_free_private; + m = file->private_data; + m->private = private; - if (!ret) { - struct seq_file *m = file->private_data; - struct namespace *namespace; - task_lock(task); - namespace = task->namespace; - if (namespace) - get_namespace(namespace); - task_unlock(task); - - if (namespace) - m->private = namespace; - else { - seq_release(inode, file); - ret = -EINVAL; - } + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + if (!namespace) { + ret = -EINVAL; + goto out_release; } + + private->namespace = namespace; + down_read(&namespace->sem); + private->last_event = namespace->event; + up_read(&namespace->sem); +out: return ret; + +out_release: + seq_release(inode, file); +out_free_private: + kfree(private); + goto out; } static int mounts_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; - struct namespace *namespace = m->private; - put_namespace(namespace); + struct proc_mounts_private *private = m->private; + put_namespace(private->namespace); + kfree(private); return seq_release(inode, file); } +static unsigned int mounts_poll(struct file *file, poll_table *wait) +{ + struct seq_file *m = file->private_data; + struct proc_mounts_private *private = m->private; + struct namespace *namespace = private->namespace; + int ret = 0; + + poll_wait(file, &mounts_wait, wait); + + down_read(&namespace->sem); + if (private->last_event != namespace->event) { + private->last_event = namespace->event; + ret = POLLIN | POLLRDNORM; + } + up_read(&namespace->sem); + + return ret; +} + static struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, .llseek = seq_lseek, + .poll = mounts_poll, .release = mounts_release, }; diff --git a/include/linux/namespace.h b/include/linux/namespace.h index 0e5a86f..fe5840a 100644 --- a/include/linux/namespace.h +++ b/include/linux/namespace.h @@ -10,6 +10,7 @@ struct namespace { struct vfsmount * root; struct list_head list; struct rw_semaphore sem; + unsigned int event; }; extern int copy_namespace(int, struct task_struct *); @@ -38,5 +39,13 @@ static inline void get_namespace(struct atomic_inc(&namespace->count); } +/* /proc/.../mounts file descriptor state */ +struct proc_mounts_private { + struct namespace * namespace; + unsigned int last_event; +}; + +extern wait_queue_head_t mounts_wait; + #endif #endif --- 0.99.8.GIT
Attachment:
pgpZoxpHzPRFW.pgp
Description: PGP signature
- Follow-Ups:
- Re: Race between "mount" uevent and /proc/mounts?
- From: Al Viro <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- References:
- Race between "mount" uevent and /proc/mounts?
- From: "Schupp Roderich (extern) BenQ MD PD SWP 2 CM MCH" <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Al Viro <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Sergey Vlasov <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Al Viro <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Kay Sievers <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Al Viro <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Kay Sievers <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Kay Sievers <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Sergey Vlasov <[email protected]>
- Re: Race between "mount" uevent and /proc/mounts?
- From: Kay Sievers <[email protected]>
- Race between "mount" uevent and /proc/mounts?
- Prev by Date: 2.6.14-ck2
- Next by Date: Re: [PATCH 1/2] slob: move kstrdup to lib/string.c
- Previous by thread: Re: Race between "mount" uevent and /proc/mounts?
- Next by thread: Re: Race between "mount" uevent and /proc/mounts?
- Index(es):