[PATCH 09/16] readahead: read-around method for mmap file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Move the readaround logic outside of filemap_nopage(), and make it a little
more smart and tunable.

The original logic is quite agressive, so relax readahead_hit_rate for mmap
to make the new implementation comparable to the old behavior. It should be
larger than normal files anyway: programs can jump back and forth frequently,
there will be less chance to accumulate a big cache_hit_rate, though the
chance of it coming back to read the missed pages are large.

The pgmajfault was increased on every readahead invocation and other possible
I/O waits. Change it to simply increase on every I/O waits, since readahead
means I/O wait in normal.

Signed-off-by: Wu Fengguang <[email protected]>
---


 include/linux/fs.h |    1 +
 include/linux/mm.h |    3 +++
 mm/filemap.c       |   42 +++++++-----------------------------------
 mm/readahead.c     |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 35 deletions(-)

--- linux.orig/include/linux/mm.h
+++ linux/include/linux/mm.h
@@ -1014,6 +1014,9 @@ page_cache_readahead_adaptive(struct add
 			struct page *prev_page, struct page *page,
 			pgoff_t first_index,
 			pgoff_t index, pgoff_t last_index);
+unsigned long
+page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
+					struct file *filp, pgoff_t index);
 void fastcall ra_access(struct file_ra_state *ra, struct page *page);
 
 #ifdef CONFIG_DEBUG_FS
--- linux.orig/mm/filemap.c
+++ linux/mm/filemap.c
@@ -1267,8 +1267,9 @@ struct page *filemap_nopage(struct vm_ar
 	struct inode *inode = mapping->host;
 	struct page *page;
 	unsigned long size, pgoff;
-	int did_readaround = 0, majmin = VM_FAULT_MINOR;
+	int majmin = VM_FAULT_MINOR;
 
+	ra->flags |= RA_FLAG_MMAP;
 	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1308,47 +1309,19 @@ retry_find:
 		}
 	}
 	if (!page) {
-		unsigned long ra_pages;
-
 		if (VM_SequentialReadHint(area)) {
 			if (!prefer_adaptive_readahead())
 				handle_ra_miss(mapping, ra, pgoff);
 			goto no_cached_page;
 		}
-		ra->mmap_miss++;
 
-		/*
-		 * Do we miss much more than hit in this file? If so,
-		 * stop bothering with read-ahead. It will only hurt.
-		 */
-		if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
-			goto no_cached_page;
+		page_cache_readaround(mapping, ra, file, pgoff);
 
-		/*
-		 * To keep the pgmajfault counter straight, we need to
-		 * check did_readaround, as this is an inner loop.
-		 */
-		if (!did_readaround) {
-			majmin = VM_FAULT_MAJOR;
-			inc_page_state(pgmajfault);
-		}
-		did_readaround = 1;
-		ra_pages = max_sane_readahead(file->f_ra.ra_pages);
-		if (ra_pages) {
-			pgoff_t start = 0;
-
-			if (pgoff > ra_pages / 2)
-				start = pgoff - ra_pages / 2;
-			do_page_cache_readahead(mapping, file, start, ra_pages);
-		}
 		page = find_get_page(mapping, pgoff);
 		if (!page)
 			goto no_cached_page;
 	}
 
-	if (!did_readaround)
-		ra->mmap_hit++;
-
 	ra_access(ra, page);
 	if (READAHEAD_DEBUG_LEVEL(6))
 		printk(KERN_DEBUG "read-mmap(ino=%lu, idx=%lu, hint=%s, io=%s)\n",
@@ -1371,7 +1344,7 @@ success:
 	mark_page_accessed(page);
 	if (type)
 		*type = majmin;
-	if (prefer_adaptive_readahead())
+	if (prefer_adaptive_readahead() || !VM_SequentialReadHint(area))
 		ra->prev_page = page->index;
 	return page;
 
@@ -1409,10 +1382,9 @@ no_cached_page:
 	return NULL;
 
 page_not_uptodate:
-	if (!did_readaround) {
-		majmin = VM_FAULT_MAJOR;
-		inc_page_state(pgmajfault);
-	}
+	majmin = VM_FAULT_MAJOR;
+	inc_page_state(pgmajfault);
+
 	lock_page(page);
 
 	/* Did it get unhashed while we waited for it? */
--- linux.orig/mm/readahead.c
+++ linux/mm/readahead.c
@@ -1517,6 +1517,56 @@ try_context_based_readahead(struct addre
 
 
 /*
+ * Read-around for mmaped file.
+ */
+unsigned long
+page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
+					struct file *filp, pgoff_t index)
+{
+	unsigned long ra_index;
+	unsigned long ra_size = 0;
+	unsigned long hit_rate = 8 + readahead_hit_rate;
+
+	if (ra->cache_hit * readahead_hit_rate > ra->age)
+		ra_size = ra->ra_pages;
+	else if (ra->cache_hit * hit_rate >= ra->age)
+		ra_size = ra->ra_pages / 4;
+	else if ((unsigned)(ra->prev_page - index) <= hit_rate)
+		ra_size = 4 * (ra->prev_page - index);
+	else if ((unsigned)(index - ra->prev_page) <= hit_rate)
+		ra_size = 4 * (index - ra->prev_page);
+	else {
+		read_lock_irq(&mapping->tree_lock);
+		if (radix_tree_lookup_node(&mapping->page_tree, index, 1))
+			ra_size = RADIX_TREE_MAP_SIZE;
+		read_unlock_irq(&mapping->tree_lock);
+	}
+
+	if (!ra_size)
+		return 0;
+
+	ra_size = max_sane_readahead(ra_size);
+
+	if (index > ra_size / 2) {
+		ra_index = index - ra_size / 2;
+		if (!(ra_size & RADIX_TREE_MAP_MASK))
+			ra_index = (ra_index + RADIX_TREE_MAP_SIZE / 2) &
+							~RADIX_TREE_MAP_MASK;
+	} else
+		ra_index = 0;
+
+	set_ra_class(ra, RA_CLASS_AROUND);
+	ra->cache_hit = 0;
+	ra->ra_index = ra_index;
+	ra->la_index = ra_index;
+	ra->readahead_index = ra_index + ra_size;
+	ra->lookahead_index = ra_index + ra_size;
+
+	ra->age = ra_dispatch(ra, mapping, filp);
+	return ra->age;
+}
+
+/*
  * ra_size is mainly determined by:
  * 1. sequential-start: min(MIN_RA_PAGES + (pages>>14), KB(128))
  * 2. sequential-max:	min(ra->ra_pages, 0xFFFF)
--- linux.orig/include/linux/fs.h
+++ linux/include/linux/fs.h
@@ -622,6 +622,7 @@ struct file_ra_state {
 };
 #define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
 #define RA_FLAG_INCACHE 0x02	/* file is already in cache */
+#define RA_FLAG_MMAP		(1UL<<31)	/* mmaped page access */
 
 struct file {
 	/*

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux