[RFC 2/3] non-resident page tracking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Prints a histogram of refaults in /proc/refaults.  This allows somebody
to estimate how much more memory a memory starved system would need to
run better.  

It can also help with the evaluation of page replacement algorithms,
since the algorithm that would need the least amount of extra memory
to fit a workload can be identified.

Signed-off-by: Rik van Riel <[email protected]>

Index: linux-2.6.12-vm/fs/proc/proc_misc.c
===================================================================
--- linux-2.6.12-vm.orig/fs/proc/proc_misc.c
+++ linux-2.6.12-vm/fs/proc/proc_misc.c
@@ -219,6 +219,20 @@ static struct file_operations fragmentat
 	.release	= seq_release,
 };
 
+extern struct seq_operations refaults_op;
+static int refaults_open(struct inode *inode, struct file *file)
+{
+	(void)inode;
+	return seq_open(file, &refaults_op);
+}
+
+static struct file_operations refaults_file_operations = {
+	.open		= refaults_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 static int version_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -588,6 +602,7 @@ void __init proc_misc_init(void)
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+	create_seq_entry("refaults",S_IRUGO, &refaults_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
 #ifdef CONFIG_MODULES
Index: linux-2.6.12-vm/mm/nonresident.c
===================================================================
--- linux-2.6.12-vm.orig/mm/nonresident.c
+++ linux-2.6.12-vm/mm/nonresident.c
@@ -24,6 +24,7 @@
 #include <linux/hash.h>
 #include <linux/prefetch.h>
 #include <linux/kernel.h>
+#include <linux/percpu.h>
 
 /* Number of non-resident pages per hash bucket */
 #define NUM_NR ((L1_CACHE_BYTES - sizeof(atomic_t))/sizeof(u32))
@@ -34,6 +35,9 @@ struct nr_bucket
 	u32 page[NUM_NR];
 } ____cacheline_aligned;
 
+/* Histogram for non-resident refault hits. [NUM_NR] means "not found". */
+DEFINE_PER_CPU(unsigned long[NUM_NR+1], refault_histogram);
+
 /* The non-resident page hash table. */
 static struct nr_bucket * nonres_table;
 static unsigned int nonres_shift;
@@ -81,11 +85,14 @@ int recently_evicted(struct address_spac
 			nr_bucket->page[i] = 0;
 			/* Return the distance between entry and clock hand. */
 			distance = atomic_read(&nr_bucket->hand) + NUM_NR - i;
-			distance = (distance % NUM_NR) + 1;
-			return distance * (1 << nonres_shift);
+			distance = distance % NUM_NR;
+			__get_cpu_var(refault_histogram)[distance]++;
+			return (distance + 1) * (1 << nonres_shift);
 		}
 	}
 
+	/* If this page was evicted, it was longer ago than our history. */
+	__get_cpu_var(refault_histogram)[NUM_NR]++;
 	return -1;
 }
 
@@ -155,3 +162,68 @@ static int __init set_nonresident_factor
 	return 1;
 }
 __setup("nonresident_factor=", set_nonresident_factor);
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static void *frag_start(struct seq_file *m, loff_t *pos)
+{
+	if (*pos < 0 || *pos > NUM_NR)
+		return NULL;
+
+	m->private = (unsigned long)*pos;
+
+	return pos;
+}
+
+static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+	if (*pos < NUM_NR) {
+		(*pos)++;
+		(unsigned long)m->private++;
+		return pos;
+	}
+	return NULL;
+}
+
+static void frag_stop(struct seq_file *m, void *arg)
+{
+}
+
+unsigned long get_refault_stat(unsigned long index)
+{
+	unsigned long total = 0;
+	int cpu;
+
+	for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++) {
+		total += per_cpu(refault_histogram, cpu)[index];
+	}
+	return total;
+}
+
+static int frag_show(struct seq_file *m, void *arg)
+{
+	unsigned long index = (unsigned long)m->private;
+	unsigned long upper = ((unsigned long)index + 1) << nonres_shift;
+	unsigned long lower = (unsigned long)index << nonres_shift;
+	unsigned long hits = get_refault_stat(index);
+
+	if (index == 0)
+		seq_printf(m, "     Refault distance          Hits\n");
+
+	if (index < NUM_NR)
+		seq_printf(m, "%9lu - %9lu     %9lu\n", lower, upper, hits);
+	else
+		seq_printf(m, " New/Beyond %9lu     %9lu\n", lower, hits);
+
+	return 0;
+}
+
+struct seq_operations refaults_op = {
+	.start  = frag_start,
+	.next   = frag_next,
+	.stop   = frag_stop,
+	.show   = frag_show,
+};
+#endif /* CONFIG_PROCFS */

--
-- 
All Rights Reversed
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux