[RFC][Patch] Use rbtree in fs/aio.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,

while grepping trough the kernel sources, i noticed there are
quite a few FIXME and TODO comments and the number is increasing
each release by ~30. That is why i try to tackle a fixme in fs/aio.c.

/*	Lookup an ioctx id.  ioctx_list is lockless for reads.
 *	FIXME: this is O(n) and is only suitable for development.
 */

The attached patch replaces the linked list in aio.c with a
rbtree to speed up the lookups. Since it seems likely() that
the first entry in the list is the one we try to lookup I
dont know if this is the way to go:

  for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
	if (likely(ioctx->user_id == ctx_id && !ioctx->dead))  {

The patch compiles, boots, and survives an aio-stress
( ftp://ftp.suse.com/pub/people/mason/utils/aio-stress.c )
run with multiple threads. When i only run a read/write test
with no random access it is ~2-3 seconds faster than without
the patch, but that might also be an error in measurement.

Since i am pretty new to kernel stuff, feel free to flame and
please cc me on replies.


Signed-off-by: Eric Sesterhenn <[email protected]>


--- linux-2.6.15/include/linux/aio.h	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-aio/include/linux/aio.h	2006-01-10 16:53:05.000000000 +0100
@@ -4,6 +4,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
+#include <linux/rbtree.h>
 
 #include <asm/atomic.h>
 
@@ -173,7 +174,7 @@ struct kioctx {
 
 	/* This needs improving */
 	unsigned long		user_id;
-	struct kioctx		*next;
+	struct rb_node		rb_node;
 
 	wait_queue_head_t	wait;
 
--- linux-2.6.15/fs/aio.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-aio/fs/aio.c	2006-01-10 18:40:30.000000000 +0100
@@ -62,6 +62,11 @@ static LIST_HEAD(fput_head);
 static void aio_kick_handler(void *);
 static void aio_queue_work(struct kioctx *);
 
+struct kioctx *ioctx_rb_get_next(struct kioctx *ioctx);
+struct kioctx *ioctx_rb_find(unsigned long ctx_id);
+void ioctx_rb_insert(struct kioctx *ioctx);
+void ioctx_rb_erase(struct kioctx *ioctx);
+
 /* aio_setup
  *	Creates the slab caches used by the aio routines, panic on
  *	failure as this is done early during the boot sequence.
@@ -244,11 +249,8 @@ static struct kioctx *ioctx_alloc(unsign
 	if (ctx->max_reqs == 0)
 		goto out_cleanup;
 
-	/* now link into global list.  kludge.  FIXME */
-	write_lock(&mm->ioctx_list_lock);
-	ctx->next = mm->ioctx_list;
-	mm->ioctx_list = ctx;
-	write_unlock(&mm->ioctx_list_lock);
+	/* insert into the tree */
+	ioctx_rb_insert(ctx);
 
 	dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
 		ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -336,11 +338,10 @@ ssize_t fastcall wait_on_sync_kiocb(stru
  */
 void fastcall exit_aio(struct mm_struct *mm)
 {
-	struct kioctx *ctx = mm->ioctx_list;
-	mm->ioctx_list = NULL;
+	struct kioctx *ctx = (struct kioctx *) mm->ioctx_rb.rb_node;
 	while (ctx) {
-		struct kioctx *next = ctx->next;
-		ctx->next = NULL;
+		struct kioctx *next = ioctx_rb_get_next(ctx);
+		ioctx_rb_erase(ctx);
 		aio_cancel_all(ctx);
 
 		wait_for_all_aios(ctx);
@@ -541,23 +542,92 @@ int fastcall aio_put_req(struct kiocb *r
 	return ret;
 }
 
-/*	Lookup an ioctx id.  ioctx_list is lockless for reads.
- *	FIXME: this is O(n) and is only suitable for development.
- */
-struct kioctx *lookup_ioctx(unsigned long ctx_id)
+struct kioctx *ioctx_rb_get_next(struct kioctx *ioctx)
 {
-	struct kioctx *ioctx;
-	struct mm_struct *mm;
+	struct mm_struct *mm = current->mm;
+	struct rb_node *tmp;
+	
+	read_lock(&mm->ioctx_list_lock);
+	tmp = rb_next(&ioctx->rb_node);
+	read_unlock(&mm->ioctx_list_lock);
+	return rb_entry(tmp, struct kioctx, rb_node);
+}
 
-	mm = current->mm;
+/*
+ *	dead -> search for !ioctx->dead
+ */
+struct kioctx *ioctx_rb_find(unsigned long ctx_id)
+{
+	struct mm_struct *mm = current->mm;
+	struct rb_node *n, *next;
+	struct kioctx *tmp;
+	
+	n = mm->ioctx_rb.rb_node;
+	if (!n)
+		return NULL;
+	
 	read_lock(&mm->ioctx_list_lock);
-	for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
-		if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) {
-			get_ioctx(ioctx);
+	for (;;)
+	{
+		tmp = rb_entry(n, struct kioctx, rb_node);
+		if (ctx_id <= tmp->user_id)
+			next = n->rb_left;
+		else
+			next = n->rb_right;
+
+		if (!next)
 			break;
-		}
+		n = next;
+	}
+	if (ctx_id > tmp->user_id)
+	{
+		tmp = ioctx_rb_get_next(tmp);
+	}
 	read_unlock(&mm->ioctx_list_lock);
+	if (tmp->dead)
+		return NULL;
+
+	return tmp;
+}
+
+void ioctx_rb_insert(struct kioctx *ioctx)
+{
+	struct mm_struct *mm = current->mm;
+	struct rb_node *parent = NULL;
+	struct rb_node **p = &mm->ioctx_rb.rb_node;
+	struct kioctx *tmp;
+	
+	write_lock(&mm->ioctx_list_lock);
+	while(*p)
+	{
+		parent = *p;
+		tmp = rb_entry(parent, struct kioctx, rb_node);
+		if (ioctx->user_id < tmp->user_id)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&ioctx->rb_node, parent, p);
+	rb_insert_color(&ioctx->rb_node, &mm->ioctx_rb);
+	write_unlock(&mm->ioctx_list_lock);
+}
+
+void ioctx_rb_erase(struct kioctx *ioctx)
+{
+        struct mm_struct *mm = current->mm;
+        write_lock(&mm->ioctx_list_lock);
+	rb_erase(&ioctx->rb_node, &mm->ioctx_rb);
+	write_unlock(&mm->ioctx_list_lock);
+}
 
+/*	Lookup an ioctx id.  ioctx_list is lockless for reads.
+ */
+struct kioctx *lookup_ioctx(unsigned long ctx_id)
+{
+	struct kioctx *ioctx;
+
+	ioctx = ioctx_rb_find(ctx_id);
+	get_ioctx(ioctx);
 	return ioctx;
 }
 
@@ -1216,21 +1286,13 @@ out:
  */
 static void io_destroy(struct kioctx *ioctx)
 {
-	struct mm_struct *mm = current->mm;
-	struct kioctx **tmp;
 	int was_dead;
 
-	/* delete the entry from the list is someone else hasn't already */
-	write_lock(&mm->ioctx_list_lock);
+	/* delete the entry from the list if someone else hasn't already */
 	was_dead = ioctx->dead;
 	ioctx->dead = 1;
-	for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx;
-	     tmp = &(*tmp)->next)
-		;
-	if (*tmp)
-		*tmp = ioctx->next;
-	write_unlock(&mm->ioctx_list_lock);
-
+	ioctx_rb_erase(ioctx);
+	
 	dprintk("aio_release(%p)\n", ioctx);
 	if (likely(!was_dead))
 		put_ioctx(ioctx);	/* twice for the list */

--- linux-2.6.15/kernel/fork.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-aio/kernel/fork.c	2006-01-10 15:53:54.000000000 +0100
@@ -322,7 +322,7 @@ static struct mm_struct * mm_init(struct
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
 	rwlock_init(&mm->ioctx_list_lock);
-	mm->ioctx_list = NULL;
+	mm->ioctx_rb = RB_ROOT;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 
--- linux-2.6.15/include/linux/sched.h	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-aio/include/linux/sched.h	2006-01-10 15:50:19.000000000 +0100
@@ -356,7 +356,7 @@ struct mm_struct {
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
-	struct kioctx		*ioctx_list;
+	struct rb_root		ioctx_rb;
 };
 
 struct sighand_struct {


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux