Re: remove_proc_entry and read_proc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> I don't understand how this is supposed to work.  Consider
> 
> CPU1					CPU2
> 
> atomic_inc(&dp->pde_users);
> if (dp->proc_fops)
> 					de->proc_fops = NULL;
> 	use_proc_fops <= BOOM
> 					if (atomic_read(&de->pde_users) > 0) {
> 
> what prevents dereference of a NULL proc_fops value?

I was wrong: what prevents it is that proc_fops isn't used at all in these
paths!  However it is used in proc_get_inode thusly:

                if (de->proc_fops)
                        inode->i_fop = de->proc_fops;

What if proc_fops is set to NULL between these two?  Putting it into a
local variable should take care of this one, but since I'm not sure if
it's really needed I'll leave that to you!

Otherwise, here's a patch that adds memory barriers (maybe these can be
SMP memory barriers) since the atomic ops you are using do not imply
such barriers according to Documentation/atomic_ops.txt.  The memory
barriers are needed, since you need to know that if you saw a non-NULL
proc_fops, then remove_proc_entry saw your increased pde_users count.
If the proc_fops write was re-ordered after the pde_users read, or the
proc_fops read was re-ordered before the pde_users write, then this may
not be true.

Also, I removed the early checks for NULL proc_fops.  True, they avoid
an atomic operation and a memory barrier, however they only avoid them
in the error path, when -EIO is going to be returned, which is hardly a
fast path.  In the common case, they represent a pointless check.

Ciao,

Duncan.

PS: Compiles, but otherwise untested.


Index: Linux/fs/proc/generic.c
===================================================================
--- Linux.orig/fs/proc/generic.c	2006-12-09 17:18:32.000000000 +0100
+++ Linux/fs/proc/generic.c	2007-02-01 10:54:36.000000000 +0100
@@ -19,6 +19,7 @@
 #include <linux/idr.h>
 #include <linux/namei.h>
 #include <linux/bitops.h>
+#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
@@ -76,6 +77,19 @@
 	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
 		return -ENOMEM;
 
+	/*
+	 * We are going to call into module's code via ->get_info or
+	 * ->read_proc. Bump refcount so that remove_proc_entry() will
+	 * wait for read to complete.
+	 */
+	atomic_inc(&dp->pde_users);
+	mb();
+	if (!dp->proc_fops)
+		/*
+		 * remove_proc_entry() marked PDE as "going away". Obey.
+		 */
+		goto out_dec;
+
 	while ((nbytes > 0) && !eof) {
 		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
 
@@ -195,6 +209,8 @@
 		buf += n;
 		retval += n;
 	}
+out_dec:
+	atomic_dec(&dp->pde_users);
 	free_page((unsigned long) page);
 	return retval;
 }
@@ -205,14 +221,28 @@
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct proc_dir_entry * dp;
+	ssize_t rv;
 	
 	dp = PDE(inode);
 
 	if (!dp->write_proc)
 		return -EIO;
 
-	/* FIXME: does this routine need ppos?  probably... */
-	return dp->write_proc(file, buffer, count, dp->data);
+	rv = -EIO;
+	/*
+	 * We are going to call into module's code via ->write_proc .
+	 * Bump refcount so that module won't dissapear while ->write_proc
+	 * sleeps in copy_from_user(). remove_proc_entry() will wait for
+	 * write to complete.
+	 */
+	atomic_inc(&dp->pde_users);
+	mb();
+	if (dp->proc_fops)
+		/* PDE is ready, refcount bumped, call into module. */
+		/* FIXME: does this routine need ppos?  probably... */
+		rv = dp->write_proc(file, buffer, count, dp->data);
+	atomic_dec(&dp->pde_users);
+	return rv;
 }
 
 
@@ -717,12 +747,26 @@
 	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
 		goto out;
 	len = strlen(fn);
-
+again:
 	spin_lock(&proc_subdir_lock);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
 			continue;
 		de = *p;
+
+		/*
+		 * Stop accepting new readers/writers. If you're dynamically
+		 * allocating ->proc_fops, save a pointer somewhere.
+		 */
+		de->proc_fops = NULL;
+		mb();
+		/* Wait until all readers/writers are done. */
+		if (atomic_read(&de->pde_users) > 0) {
+			spin_unlock(&proc_subdir_lock);
+			msleep(1);
+			goto again;
+		}
+
 		*p = de->next;
 		de->next = NULL;
 		if (S_ISDIR(de->mode))
Index: Linux/include/linux/proc_fs.h
===================================================================
--- Linux.orig/include/linux/proc_fs.h	2006-10-03 15:39:31.000000000 +0200
+++ Linux/include/linux/proc_fs.h	2007-02-01 10:42:07.000000000 +0100
@@ -56,6 +56,19 @@
 	gid_t gid;
 	loff_t size;
 	struct inode_operations * proc_iops;
+	/*
+	 * NULL ->proc_fops means "PDE is going away RSN" or
+	 * "PDE is just created". In either case ->get_info, ->read_proc,
+	 * ->write_proc won't be called because it's too late or too early,
+	 * respectively.
+	 *
+	 * Valid ->proc_fops means "use this file_operations" or
+	 * "->data is setup, it's safe to call ->read_proc, ->write_proc which
+	 * dereference it".
+	 *
+	 * If you're allocating ->proc_fops dynamically, save a pointer
+	 * somewhere.
+	 */
 	const struct file_operations * proc_fops;
 	get_info_t *get_info;
 	struct module *owner;
@@ -66,6 +79,8 @@
 	atomic_t count;		/* use count */
 	int deleted;		/* delete flag */
 	void *set;
+	atomic_t pde_users;	/* number of readers + number of writers via
+				 * ->read_proc, ->write_proc, ->get_info */
 };
 
 struct kcore_list {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux