[RFC][PATCH] ps command race fix take2 [4/4] proc_pid_readdir

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



proc_pid_readdir() by list_token.

Remember 'where we are reading' by inserting a token in the list.
It seems a bit complicated because of RCU but what we do is very simple.

Considering task A-G.

--(A)--(B)--(C)--(D)--(E)--(F)--(G)--

When we have read (C) (and return to user process),
inserting a token before (D)

--(A)--(B)--(C)--(token)--(D)--(E)--(F)--(G)--

If (A),(B),(C),(D),(E),(F) exits before next readdir(), the list is

--(token)--(G)--

readdir() can restart from (G).

Signed-Off-By: KAMEZAWA Hiroyuki <[email protected]>
Signed-Off-By: Tadashi Saito <[email protected]>

 fs/proc/base.c |  124 +++++++++++++++++++++++++++++++--------------------------
 fs/proc/root.c |   11 +++++
 2 files changed, 80 insertions(+), 55 deletions(-)

Index: linux-2.6.18-rc4/fs/proc/base.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/proc/base.c
+++ linux-2.6.18-rc4/fs/proc/base.c
@@ -2141,44 +2141,35 @@ out_no_task:
 }
 
 /*
- * Find the first tgid to return to user space.
- *
- * Usually this is just whatever follows &init_task, but if the users
- * buffer was too small to hold the full list or there was a seek into
- * the middle of the directory we have more work to do.
- *
- * In the case of a short read we start with find_task_by_pid.
- *
- * In the case of a seek we start with &init_task and walk nr
- * threads past it.
+ * Find the first tgid to return to user space. Usually this is just
+ * whatever follows &init_task.
  */
-static struct task_struct *first_tgid(int tgid, unsigned int nr)
+/*
+ * return the first *alive* task.
+ */
+static inline struct task_struct *first_alive_task(struct task_struct *task)
+{
+	while (!pid_alive(task) && task != &init_task)
+		task = next_task(task);
+	if (task == &init_task)
+		return NULL;
+	return task;
+}
+
+static struct task_struct *first_tgid(struct list_token *start)
 {
 	struct task_struct *pos;
-	rcu_read_lock();
-	if (tgid && nr) {
-		pos = find_task_by_pid(tgid);
-		if (pos && thread_group_leader(pos))
-			goto found;
-	}
-	/* If nr exceeds the number of processes get out quickly */
-	pos = NULL;
-	if (nr && nr >= nr_processes())
-		goto done;
 
-	/* If we haven't found our starting place yet start with
-	 * the init_task and walk nr tasks forward.
-	 */
-	for (pos = next_task(&init_task); nr > 0; --nr) {
-		pos = next_task(pos);
-		if (pos == &init_task) {
-			pos = NULL;
-			goto done;
-		}
+ 	rcu_read_lock();
+	if (!start) {
+		pos = next_task(&init_task);
+	} else {
+		start = list_next_rcu_skiptoken(start);
+		pos = container_of(start, struct task_struct, tasks);
 	}
-found:
-	get_task_struct(pos);
-done:
+	pos = first_alive_task(pos);
+	if (pos)
+		get_task_struct(pos);
 	rcu_read_unlock();
 	return pos;
 }
@@ -2192,16 +2183,12 @@ done:
 static struct task_struct *next_tgid(struct task_struct *start)
 {
 	struct task_struct *pos;
-	rcu_read_lock();
+
 	pos = start;
-	if (pid_alive(start))
-		pos = next_task(start);
-	if (pid_alive(pos) && (pos != &init_task)) {
+	rcu_read_lock();
+	pos = first_alive_task(next_task(pos));
+	if(pos)
 		get_task_struct(pos);
-		goto done;
-	}
-	pos = NULL;
-done:
 	rcu_read_unlock();
 	put_task_struct(start);
 	return pos;
@@ -2212,24 +2199,31 @@ int proc_pid_readdir(struct file * filp,
 {
 	char buf[PROC_NUMBUF];
 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	struct task_struct *task;
+	struct task_struct *task, *pos;
 	int tgid;
+	struct list_token *token, *old_token;
 
 	if (!nr) {
 		ino_t ino = fake_ino(0,PROC_TGID_INO);
 		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
 			return 0;
 		filp->f_pos++;
-		nr++;
+		filp->private_data = NULL;
+	} else if (filp->private_data == NULL)
+		return 0;  /* all entries were read */
+
+retry:
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
+	if (!token) {
+		/* this small kmalloc() can fail in rare case, but readdir()
+		 * is not allowed to return ENOMEM. retrying is reasonable. */
+		yield();
+		goto retry;
 	}
-	nr -= 1;
 
-	/* f_version caches the tgid value that the last readdir call couldn't
-	 * return. lseek aka telldir automagically resets f_version to 0.
-	 */
-	tgid = filp->f_version;
-	filp->f_version = 0;
-	for (task = first_tgid(tgid, nr);
+	old_token = filp->private_data;
+	filp->private_data = NULL;
+	for (task = first_tgid(old_token);
 	     task;
 	     task = next_tgid(task), filp->f_pos++) {
 		int len;
@@ -2237,14 +2231,34 @@ int proc_pid_readdir(struct file * filp,
 		tgid = task->pid;
 		len = snprintf(buf, sizeof(buf), "%d", tgid);
 		ino = fake_ino(tgid, PROC_TGID_INO);
-		if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
-			/* returning this tgid failed, save it as the first
-			 * pid for the next readir call */
-			filp->f_version = tgid;
-			put_task_struct(task);
+		if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0)
 			break;
+	}
+	write_lock_irq(&tasklist_lock);
+	/*
+	 * insert token and remember it for next access.
+	 * insertion of token should be done just before not-stale task.
+	 */
+	if (task) {
+		pos = first_alive_task(task);
+		if (pos != task) { /* task is not alive */
+			if (pos)
+				get_task_struct(pos);
+			put_task_struct(task);
+		}
+		if (pos) { /* remember here for next access */
+			/* token->token turns to be 1 */
+			insert_list_token_rcu(token, &pos->tasks);
+			put_task_struct(pos);
+			filp->private_data = token;
 		}
 	}
+	if (old_token)
+		remove_list_token_rcu(old_token);
+	write_unlock_irq(&tasklist_lock);
+
+	if (!token->token) /* this token was not inserted */
+		kfree(token);
 	return 0;
 }
 
Index: linux-2.6.18-rc4/fs/proc/root.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/proc/root.c
+++ linux-2.6.18-rc4/fs/proc/root.c
@@ -118,6 +118,16 @@ static int proc_root_readdir(struct file
 	return ret;
 }
 
+static int proc_root_release(struct inode * dir, struct file * filp)
+{
+	if (filp->private_data) {
+		write_lock_irq(&tasklist_lock);
+		remove_list_token_rcu(filp->private_data);
+		write_unlock_irq(&tasklist_lock);
+	}
+	return 0;
+}
+
 /*
  * The root /proc directory is special, as it has the
  * <pid> directories. Thus we don't use the generic
@@ -126,6 +136,7 @@ static int proc_root_readdir(struct file
 static struct file_operations proc_root_operations = {
 	.read		 = generic_read_dir,
 	.readdir	 = proc_root_readdir,
+	.release	 = proc_root_release,
 };
 
 /*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux