Re: [PATCH 0/9] mmap read-around and readahead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Dec 18, 2007 at 07:46:09PM +0800, Fengguang Wu wrote:
> No timings for now... but I wrote a debug patch(attached) and watched
> it running for about a week.  Here are some interesting numbers:

Here are the (forgotten) readahead-debug.patch:

---
 include/linux/fs.h |   43 ++++++++++++++++++++++++++++++++++
 mm/Kconfig         |   19 +++++++++++++++
 mm/filemap.c       |    1 
 mm/readahead.c     |   54 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 116 insertions(+), 1 deletion(-)

--- linux-2.6.24-rc4-mm1.orig/include/linux/fs.h
+++ linux-2.6.24-rc4-mm1/include/linux/fs.h
@@ -760,11 +760,54 @@ struct file_ra_state {
 	unsigned int async_size;	/* do asynchronous readahead when
 					   there are only # of pages ahead */
 
+	unsigned int flags;
 	unsigned int ra_pages;		/* Maximum readahead window */
 	int mmap_miss;			/* Cache miss stat for mmap accesses */
 	loff_t prev_pos;		/* Cache last read() position */
 };
 
+#define RA_CLASS_SHIFT		4
+#define RA_CLASS_MASK		((1 << RA_CLASS_SHIFT) - 1)
+/*
+ * Detailed classification of read-ahead behaviors.
+ */
+enum ra_class {
+	RA_CLASS_INIT0,
+	RA_CLASS_INIT,
+	RA_CLASS_SEQUENTIAL,
+	RA_CLASS_INTERLEAVED,
+	RA_CLASS_CONTEXT,
+	RA_CLASS_AROUND,
+	RA_CLASS_COUNT
+};
+
+static inline enum ra_class ra_class_new(struct file_ra_state *ra)
+{
+	return ra->flags & RA_CLASS_MASK;
+}
+
+static inline enum ra_class ra_class_old(struct file_ra_state *ra)
+{
+	return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static inline void ra_set_class(struct file_ra_state *ra, enum ra_class ra_class)
+{
+	unsigned long flags_mask;
+	unsigned long flags;
+	unsigned long old_ra_class;
+
+	flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+	flags = ra->flags & flags_mask;
+
+	old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT;
+
+	ra->flags = flags | old_ra_class | ra_class;
+}
+
 /*
  * Check if @index falls in the readahead windows.
  */
--- linux-2.6.24-rc4-mm1.orig/mm/Kconfig
+++ linux-2.6.24-rc4-mm1/mm/Kconfig
@@ -194,3 +194,22 @@ config NR_QUICK
 config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
+
+config DEBUG_READAHEAD
+	bool "Readahead debug and accounting"
+	default y
+	select DEBUG_FS
+	help
+	  This option injects extra code to dump detailed debug traces and do
+	  readahead events accounting.
+
+	  To actually get the data:
+
+	  mkdir /debug
+	  mount -t debug none /debug
+
+	  After that you can do the following:
+
+	  echo > /debug/readahead/events # reset the counters
+	  cat /debug/readahead/events    # check the counters
+
--- linux-2.6.24-rc4-mm1.orig/mm/readahead.c
+++ linux-2.6.24-rc4-mm1/mm/readahead.c
@@ -16,6 +16,29 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
+#include <linux/debugfs.h>
+
+static const char * const ra_class_name[] = {
+	[RA_CLASS_INIT0]	= "init0",
+	[RA_CLASS_INIT]		= "init",
+	[RA_CLASS_SEQUENTIAL]	= "sequential",
+	[RA_CLASS_INTERLEAVED]	= "interleaved",
+	[RA_CLASS_CONTEXT]	= "context",
+	[RA_CLASS_AROUND]	= "around",
+};
+
+#ifdef CONFIG_DEBUG_READAHEAD
+static u32 readahead_debug_level = 1;
+#  define debug_option(o)		(o)
+#else
+#  define debug_option(o)		(0)
+#  define readahead_debug_level 	(0)
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+#define dprintk(args...) \
+	do { if (readahead_debug_level >= 2) printk(KERN_DEBUG args); } while(0)
+#define ddprintk(args...) \
+	do { if (readahead_debug_level >= 3) printk(KERN_DEBUG args); } while(0)
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -220,6 +243,13 @@ unsigned long max_sane_readahead(unsigne
 
 static int __init readahead_init(void)
 {
+#ifdef CONFIG_DEBUG_READAHEAD
+	struct dentry *root;
+
+	root = debugfs_create_dir("readahead", NULL);
+
+	debugfs_create_u32("debug_level", 0644, root, &readahead_debug_level);
+#endif
 	return bdi_init(&default_backing_dev_info);
 }
 subsys_initcall(readahead_init);
@@ -235,6 +265,15 @@ unsigned long ra_submit(struct file_ra_s
 	actual = __do_page_cache_readahead(mapping, filp,
 					ra->start, ra->size, ra->async_size);
 
+	dprintk("readahead-%s(process: %s/%d, file: %s/%s, "
+			"offset=%ld:%ld, ra=%ld+%d-%d) = %d\n",
+			ra_class_name[ra_class_new(ra)],
+			current->comm, current->pid,
+			mapping->host->i_sb->s_id,
+			filp->f_path.dentry->d_iname,
+			(long)(filp->f_pos >> PAGE_CACHE_SHIFT),
+			(long)(ra->prev_pos >> PAGE_CACHE_SHIFT),
+			ra->start, ra->size, ra->async_size, actual);
 	return actual;
 }
 
@@ -337,6 +376,7 @@ ondemand_readahead(struct address_space 
 		ra->start += ra->size;
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
+		ra_set_class(ra, RA_CLASS_SEQUENTIAL);
 		goto readit;
 	}
 
@@ -348,8 +388,15 @@ ondemand_readahead(struct address_space 
 	 * Read as is, and do not pollute the readahead state.
 	 */
 	if (!hit_readahead_marker && !sequential) {
-		return __do_page_cache_readahead(mapping, filp,
+		int actual = __do_page_cache_readahead(mapping, filp,
 						offset, req_size, 0);
+		dprintk("read-random(process: %s/%d, file: %s/%s, "
+			"req=%ld+%ld) = %d\n",
+				current->comm, current->pid,
+				mapping->host->i_sb->s_id,
+				filp->f_path.dentry->d_iname,
+				offset, req_size, actual);
+		return actual;
 	}
 
 	/*
@@ -372,6 +419,7 @@ ondemand_readahead(struct address_space 
 		ra->size = start - offset;	/* old async_size */
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
+		ra_set_class(ra, RA_CLASS_INTERLEAVED);
 		goto readit;
 	}
 
@@ -385,6 +433,10 @@ ondemand_readahead(struct address_space 
 	ra->start = offset;
 	ra->size = get_init_ra_size(req_size, max);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
+	if (offset)
+		ra_set_class(ra, RA_CLASS_INIT);
+	else
+		ra_set_class(ra, RA_CLASS_INIT0);
 
 readit:
 	/*
--- linux-2.6.24-rc4-mm1.orig/mm/filemap.c
+++ linux-2.6.24-rc4-mm1/mm/filemap.c
@@ -1340,6 +1340,7 @@ static void do_sync_mmap_readahead(struc
 		ra->start = max_t(long, 0, offset - ra_pages / 2);
 		ra->size = ra_pages;
 		ra->async_size = 0;
+		ra_set_class(ra, RA_CLASS_AROUND);
 		ra_submit(ra, mapping, file);
 	}
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux