Re: [PATCH] protect remove_proc_entry

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Andrew,

I have a full patch for this.
I don't remember the details yet, but lock was not god here, we used semaphore. I pointed to this problem long ago when fixed error path in proc with moduleget.

This patch protects proc_dir_entry tree with a proc_tree_sem semaphore. I suppose lock_kernel() can be removed later after checking that no proc handlers require it. Also this patch remakes de refcounters a bit making it more clear and more similar to dentry scheme - this is required to make sure that everything works correctly.

Patch is against 2.6.15-rcX and was tested for about a week. Also works half a year on 2.6.8 :)

Signed-Off-By: Pavel Emelianov <[email protected]>
Signed-Off-By: Kirill Korotaev <[email protected]>

Kirill

Steven Rostedt <[email protected]> wrote:
+static DEFINE_SPINLOCK(remove_proc_lock);


I'll take a closer look at this next week.

The official way of protecting the contents of a directory from concurrent
lookup or modification is to take its i_sem.  But procfs is totally weird
and that approach may well not be practical here.  We'd certainly prefer
not to rely upon lock_kernel().
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

--- ./fs/proc/generic.c.proclk	2005-12-26 13:43:14.000000000 +0300
+++ ./fs/proc/generic.c	2005-12-31 11:48:16.000000000 +0300
@@ -27,6 +27,8 @@ static ssize_t proc_file_write(struct fi
 			       size_t count, loff_t *ppos);
 static loff_t proc_file_lseek(struct file *, loff_t, int);
 
+static DECLARE_RWSEM(proc_tree_sem);
+
 int proc_match(int len, const char *name, struct proc_dir_entry *de)
 {
 	if (de->namelen != len)
@@ -381,6 +383,7 @@ struct dentry *proc_lookup(struct inode 
 	lock_kernel();
 	de = PDE(dir);
 	if (de) {
+		down_read(&proc_tree_sem);
 		for (de = de->subdir; de ; de = de->next) {
 			if (de->namelen != dentry->d_name.len)
 				continue;
@@ -392,6 +395,7 @@ struct dentry *proc_lookup(struct inode 
 				break;
 			}
 		}
+		up_read(&proc_tree_sem);
 	}
 	unlock_kernel();
 
@@ -446,12 +450,13 @@ int proc_readdir(struct file * filp,
 			filp->f_pos++;
 			/* fall through */
 		default:
+			down_read(&proc_tree_sem);
 			de = de->subdir;
 			i -= 2;
 			for (;;) {
 				if (!de) {
 					ret = 1;
-					goto out;
+					goto out_up;
 				}
 				if (!i)
 					break;
@@ -462,14 +467,18 @@ int proc_readdir(struct file * filp,
 			do {
 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
 					    de->low_ino, de->mode >> 12) < 0)
-					goto out;
+					goto out_up;
 				filp->f_pos++;
 				de = de->next;
 			} while (de);
+			up_read(&proc_tree_sem);
 	}
 	ret = 1;
 out:	unlock_kernel();
 	return ret;	
+out_up:
+	up_read(&proc_tree_sem);
+	goto out;
 }
 
 /*
@@ -517,6 +526,7 @@ static int proc_register(struct proc_dir
 		if (dp->proc_iops == NULL)
 			dp->proc_iops = &proc_file_inode_operations;
 	}
+	de_get(dir);
 	return 0;
 }
 
@@ -576,6 +586,7 @@ static struct proc_dir_entry *proc_creat
 
 	memset(ent, 0, sizeof(struct proc_dir_entry));
 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
+	atomic_set(&ent->count, 1);
 	ent->name = ((char *) ent) + sizeof(*ent);
 	ent->namelen = len;
 	ent->mode = mode;
@@ -589,6 +600,7 @@ struct proc_dir_entry *proc_symlink(cons
 {
 	struct proc_dir_entry *ent;
 
+	down_write(&proc_tree_sem);
 	ent = proc_create(&parent,name,
 			  (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
 
@@ -606,6 +618,7 @@ struct proc_dir_entry *proc_symlink(cons
 			ent = NULL;
 		}
 	}
+	up_write(&proc_tree_sem);
 	return ent;
 }
 
@@ -614,6 +627,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 {
 	struct proc_dir_entry *ent;
 
+	down_write(&proc_tree_sem);
 	ent = proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
 		ent->proc_fops = &proc_dir_operations;
@@ -624,6 +638,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
 			ent = NULL;
 		}
 	}
+	up_write(&proc_tree_sem);
 	return ent;
 }
 
@@ -633,7 +648,7 @@ struct proc_dir_entry *proc_mkdir(const 
 	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
 }
 
-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+static struct proc_dir_entry *__create_proc_entry(const char *name, mode_t mode,
 					 struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry *ent;
@@ -665,6 +680,17 @@ struct proc_dir_entry *create_proc_entry
 	return ent;
 }
 
+struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+					 struct proc_dir_entry *parent)
+{
+	struct proc_dir_entry *ent;
+
+	down_write(&proc_tree_sem);
+	ent = __create_proc_entry(name, mode, parent);
+	up_write(&proc_tree_sem);
+	return ent;
+}
+
 void free_proc_entry(struct proc_dir_entry *de)
 {
 	unsigned int ino = de->low_ino;
@@ -683,15 +709,13 @@ void free_proc_entry(struct proc_dir_ent
  * Remove a /proc entry and free it if it's not currently in use.
  * If it is in use, we set the 'deleted' flag.
  */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry **p;
 	struct proc_dir_entry *de;
 	const char *fn = name;
 	int len;
 
-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-		goto out;
 	len = strlen(fn);
 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
 		if (!proc_match(len, fn, *p))
@@ -699,20 +723,24 @@ void remove_proc_entry(const char *name,
 		de = *p;
 		*p = de->next;
 		de->next = NULL;
+		de_put(parent);
 		if (S_ISDIR(de->mode))
 			parent->nlink--;
 		proc_kill_inodes(de);
 		de->nlink = 0;
 		WARN_ON(de->subdir);
-		if (!atomic_read(&de->count))
-			free_proc_entry(de);
-		else {
-			de->deleted = 1;
-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
-				parent->name, de->name, atomic_read(&de->count));
-		}
+		de->deleted = 1;
+		de_put(de);
 		break;
 	}
-out:
-	return;
+}
+
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+	const char *fn = name;
+
+	down_write(&proc_tree_sem);
+	if (parent || xlate_proc_name(name, &parent, &fn) == 0)
+		__remove_proc_entry(name, parent);
+	up_write(&proc_tree_sem);
 }
--- ./fs/proc/inode.c.proclk	2005-12-26 13:43:14.000000000 +0300
+++ ./fs/proc/inode.c	2005-12-31 11:48:16.000000000 +0300
@@ -21,34 +21,25 @@
 
 extern void free_proc_entry(struct proc_dir_entry *);
 
-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-{
-	if (de)
-		atomic_inc(&de->count);
-	return de;
-}
-
 /*
  * Decrements the use count and checks for deferred deletion.
  */
-static void de_put(struct proc_dir_entry *de)
+void de_put(struct proc_dir_entry *de)
 {
 	if (de) {	
-		lock_kernel();		
 		if (!atomic_read(&de->count)) {
 			printk("de_put: entry %s already free!\n", de->name);
-			unlock_kernel();
 			return;
 		}
 
 		if (atomic_dec_and_test(&de->count)) {
-			if (de->deleted) {
-				printk("de_put: deferred delete of %s\n",
-					de->name);
-				free_proc_entry(de);
+			if (!de->deleted) {
+				printk("de_put: entry %s is not removed yet\n",
+						de->name);
+				return;
 			}
-		}		
-		unlock_kernel();
+			free_proc_entry(de);
+		}
 	}
 }
 
--- ./include/linux/proc_fs.h.proclk	2005-12-26 13:43:16.000000000 +0300
+++ ./include/linux/proc_fs.h	2005-12-31 11:48:16.000000000 +0300
@@ -69,6 +69,14 @@ struct proc_dir_entry {
 	void *set;
 };
 
+extern void de_put(struct proc_dir_entry *);
+static inline struct proc_dir_entry *de_get(struct proc_dir_entry *de)
+{
+	if (de)
+		atomic_inc(&de->count);
+	return de;
+}
+
 struct kcore_list {
 	struct kcore_list *next;
 	unsigned long addr;
--- ./kernel/sysctl.c.proclk	2005-12-26 13:43:16.000000000 +0300
+++ ./kernel/sysctl.c	2005-12-31 11:48:37.000000000 +0300
@@ -1396,19 +1396,15 @@ static void unregister_proc_table(ctl_ta
 				continue;
 		}
 
-		/*
-		 * In any case, mark the entry as goner; we'll keep it
-		 * around if it's busy, but we'll know to do nothing with
-		 * its fields.  We are under sysctl_lock here.
-		 */
 		de->data = NULL;
-
-		/* Don't unregister proc entries that are still being used.. */
-		if (atomic_read(&de->count))
-			continue;
-
 		table->de = NULL;
+		/*
+		 * sys_sysctl can't find us, since we are removed from list.
+		 * proc won't touch either, since de->data is NULL.
+		 */
+		spin_unlock(&sysctl_lock);
 		remove_proc_entry(table->procname, root);
+		spin_lock(&sysctl_lock);
 	}
 }
 

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux