[PATCH/RFT 4/5] CLOCK-Pro page replacement

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Implement an approximation to Song Jiang's CLOCK-Pro page replacement
algorithm.  The algorithm has been extended to handle multiple memory
zones and, consequently, needed some changes in the active page limit
readjustment.

TODO:
 - verify that things work as expected
 - figure out where to put new anonymous pages

More information can be found at:
 - http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
 - http://linux-mm.org/wiki/ClockProApproximation

Signed-off-by: Rik van Riel <[email protected]>

Index: linux-2.6.12-vm/include/linux/mmzone.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/mmzone.h
+++ linux-2.6.12-vm/include/linux/mmzone.h
@@ -143,6 +143,8 @@ struct zone {
 	unsigned long		nr_inactive;
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
+	unsigned long		active_limit;
+	unsigned long		active_scanned;
 
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
Index: linux-2.6.12-vm/include/linux/swap.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/swap.h
+++ linux-2.6.12-vm/include/linux/swap.h
@@ -154,10 +154,15 @@ extern void out_of_memory(unsigned int _
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
 
 /* linux/mm/nonresident.c */
-extern int remember_page(struct address_space *, unsigned long);
+extern int do_remember_page(struct address_space *, unsigned long);
 extern int recently_evicted(struct address_space *, unsigned long);
 extern void init_nonresident(void);
 
+/* linux/mm/clockpro.c */
+extern void remember_page(struct page *, struct address_space *, unsigned long);
+extern int page_is_hot(struct page *, struct address_space *, unsigned long);
+DECLARE_PER_CPU(unsigned long, evicted_pages);
+
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalhigh_pages;
@@ -298,6 +303,9 @@ static inline swp_entry_t get_swap_page(
 #define remember_page(x,y)	0
 #define recently_evicted(x,y)	0
 
+/* linux/mm/clockpro.c */
+#define page_is_hot(x,y,z)	0
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
Index: linux-2.6.12-vm/mm/Makefile
===================================================================
--- linux-2.6.12-vm.orig/mm/Makefile
+++ linux-2.6.12-vm/mm/Makefile
@@ -13,7 +13,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   prio_tree.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o \
-			   nonresident.o
+			   nonresident.o clockpro.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SHMEM) += shmem.o
Index: linux-2.6.12-vm/mm/clockpro.c
===================================================================
--- /dev/null
+++ linux-2.6.12-vm/mm/clockpro.c
@@ -0,0 +1,102 @@
+/*
+ * mm/clockpro.c
+ * (C) 2005 Red Hat, Inc
+ * Written by Rik van Riel <[email protected]>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Helper functions to implement CLOCK-Pro page replacement policy.
+ * For details see: http://linux-mm.org/wiki/AdvancedPageReplacement
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+
+DEFINE_PER_CPU(unsigned long, evicted_pages);
+static unsigned long get_evicted(void)
+{
+	unsigned long total = 0;
+	int cpu;
+
+	for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+		total += per_cpu(evicted_pages, cpu);
+
+	return total;
+}
+
+static unsigned long estimate_pageable_memory(void)
+{
+	static unsigned long next_check;
+	static unsigned long total;
+	unsigned long active, inactive, free;
+
+	if (time_after(jiffies, next_check)) {
+		get_zone_counts(&active, &inactive, &free);
+		total = active + inactive + free;
+		next_check = jiffies + HZ/10;
+	}
+
+	return total;
+}
+
+static void decay_clockpro_variables(void)
+{
+	struct zone * zone;
+	int cpu;
+
+	for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+		per_cpu(evicted_pages, cpu) /= 2;
+
+	for_each_zone(zone)
+		zone->active_scanned /= 2;
+}
+
+int page_is_hot(struct page * page, struct address_space * mapping,
+		unsigned long index)
+{
+	unsigned long long distance;
+	unsigned long long evicted;
+	int refault_distance;
+	struct zone *zone;
+
+	/* Was the page recently evicted ? */
+	refault_distance = recently_evicted(mapping, index);
+	if (refault_distance < 0)
+		return 0;
+
+	distance = estimate_pageable_memory() + refault_distance;
+	evicted = get_evicted();
+	zone = page_zone(page);
+
+	/* Only consider recent history for the calculation below. */
+	if (unlikely(evicted > distance))
+		decay_clockpro_variables();
+
+	/*
+	 * Estimate whether the inter-reference distance of the tested
+	 * page is smaller than the inter-reference distance of the
+	 * oldest page on the active list.
+	 *
+	 *  distance        zone->nr_active
+	 * ---------- <  ----------------------
+	 *  evicted       zone->active_scanned
+	 */
+	if (distance * zone->active_scanned < evicted * zone->nr_active) {
+		if (zone->active_limit > zone->present_pages / 8)
+			zone->active_limit--;
+		return 1;
+	}
+
+	/* Increase the active limit more slowly. */
+	if ((evicted & 1) && zone->active_limit < zone->present_pages * 7 / 8)
+		zone->active_limit++;
+	return 0;
+}
+
+void remember_page(struct page * page, struct address_space * mapping,
+		unsigned long index)
+{
+	struct zone * zone = page_zone(page);
+	if (do_remember_page(mapping, index) && (index & 1) &&
+			zone->active_limit < zone->present_pages * 7 / 8)
+		zone->active_limit++;
+}
Index: linux-2.6.12-vm/mm/filemap.c
===================================================================
--- linux-2.6.12-vm.orig/mm/filemap.c
+++ linux-2.6.12-vm/mm/filemap.c
@@ -401,9 +401,12 @@ int add_to_page_cache_lru(struct page *p
 				pgoff_t offset, int gfp_mask)
 {
 	int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	recently_evicted(mapping, offset);
-	if (ret == 0)
-		lru_cache_add(page);
+	if (ret == 0) {
+		if (page_is_hot(page, mapping, offset))
+			lru_cache_add_active(page);
+		else
+			lru_cache_add(page);
+	}
 	return ret;
 }
 
Index: linux-2.6.12-vm/mm/nonresident.c
===================================================================
--- linux-2.6.12-vm.orig/mm/nonresident.c
+++ linux-2.6.12-vm/mm/nonresident.c
@@ -25,6 +25,7 @@
 #include <linux/prefetch.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/swap.h>
 
 /* Number of non-resident pages per hash bucket. Never smaller than 15. */
 #if (L1_CACHE_BYTES < 64)
@@ -101,7 +102,7 @@ int recently_evicted(struct address_spac
 	return -1;
 }
 
-int remember_page(struct address_space * mapping, unsigned long index)
+int do_remember_page(struct address_space * mapping, unsigned long index)
 {
 	struct nr_bucket * nr_bucket;
 	u32 nrpage;
@@ -125,6 +126,7 @@ int remember_page(struct address_space *
 	preempt_enable();
 
 	/* Statistics may want to know whether the entry was in use. */
+	__get_cpu_var(evicted_pages)++;
 	return xchg(&nr_bucket->page[i], nrpage);
 }
 
Index: linux-2.6.12-vm/mm/page_alloc.c
===================================================================
--- linux-2.6.12-vm.orig/mm/page_alloc.c
+++ linux-2.6.12-vm/mm/page_alloc.c
@@ -1715,6 +1715,7 @@ static void __init free_area_init_core(s
 		zone->nr_scan_inactive = 0;
 		zone->nr_active = 0;
 		zone->nr_inactive = 0;
+		zone->active_limit = zone->present_pages * 2 / 3;
 		if (!size)
 			continue;
 
Index: linux-2.6.12-vm/mm/swap_state.c
===================================================================
--- linux-2.6.12-vm.orig/mm/swap_state.c
+++ linux-2.6.12-vm/mm/swap_state.c
@@ -323,6 +323,7 @@ struct page *read_swap_cache_async(swp_e
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *found_page, *new_page = NULL;
+	int active;
 	int err;
 
 	do {
@@ -344,7 +345,7 @@ struct page *read_swap_cache_async(swp_e
 				break;		/* Out of memory */
 		}
 
-		recently_evicted(&swapper_space, entry.val);
+		active = page_is_hot(new_page, &swapper_space, entry.val);
 
 		/*
 		 * Associate the page with swap entry in the swap cache.
@@ -361,7 +362,10 @@ struct page *read_swap_cache_async(swp_e
 			/*
 			 * Initiate read into locked page and return.
 			 */
-			lru_cache_add_active(new_page);
+			if (active) {
+				lru_cache_add_active(new_page);
+			} else
+				lru_cache_add(new_page);
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
Index: linux-2.6.12-vm/mm/vmscan.c
===================================================================
--- linux-2.6.12-vm.orig/mm/vmscan.c
+++ linux-2.6.12-vm/mm/vmscan.c
@@ -355,12 +355,14 @@ static int shrink_list(struct list_head 
 	while (!list_empty(page_list)) {
 		struct address_space *mapping;
 		struct page *page;
+		struct zone *zone;
 		int may_enter_fs;
 		int referenced;
 
 		cond_resched();
 
 		page = lru_to_page(page_list);
+		zone = page_zone(page);
 		list_del(&page->lru);
 
 		if (TestSetPageLocked(page))
@@ -492,7 +494,7 @@ static int shrink_list(struct list_head 
 #ifdef CONFIG_SWAP
 		if (PageSwapCache(page)) {
 			swp_entry_t swap = { .val = page->private };
-			remember_page(&swapper_space, page->private);
+			remember_page(page, &swapper_space, page->private);
 			__delete_from_swap_cache(page);
 			write_unlock_irq(&mapping->tree_lock);
 			swap_free(swap);
@@ -501,7 +503,7 @@ static int shrink_list(struct list_head 
 		}
 #endif /* CONFIG_SWAP */
 
-		remember_page(page->mapping, page->index);
+		remember_page(page, page->mapping, page->index);
 		__remove_from_page_cache(page);
 		write_unlock_irq(&mapping->tree_lock);
 		__put_page(page);
@@ -684,6 +686,7 @@ refill_inactive_zone(struct zone *zone, 
 	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
 				    &l_hold, &pgscanned);
 	zone->pages_scanned += pgscanned;
+	zone->active_scanned += pgscanned;
 	zone->nr_active -= pgmoved;
 	spin_unlock_irq(&zone->lru_lock);
 
@@ -799,10 +802,15 @@ shrink_zone(struct zone *zone, struct sc
 	unsigned long nr_inactive;
 
 	/*
-	 * Add one to `nr_to_scan' just to make sure that the kernel will
-	 * slowly sift through the active list.
+	 * Scan the active list if we have too many active pages.
+	 * The limit is automatically adjusted through refaults
+	 * measuring how well the VM did in the past.
 	 */
-	zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+	if (zone->nr_active > zone->active_limit)
+		zone->nr_scan_active += zone->nr_active - zone->active_limit;
+	else if (sc->priority < DEF_PRIORITY - 2)
+		zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+
 	nr_active = zone->nr_scan_active;
 	if (nr_active >= sc->swap_cluster_max)
 		zone->nr_scan_active = 0;

--
-- 
All Rights Reversed
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux