[PATCH 13/13] maps#2: Add /proc/kpagemap interface

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add /proc/kpagemap interface

This makes physical page flags and counts available to userspace.
Together with /proc/pid/pagemap and /proc/pid/clear_refs, this can be
used to measure memory usage on a per-page basis.

Signed-off-by: Matt Mackall <[email protected]>

Index: mm/fs/proc/proc_misc.c
===================================================================
--- mm.orig/fs/proc/proc_misc.c	2007-04-05 14:18:49.000000000 -0500
+++ mm/fs/proc/proc_misc.c	2007-04-05 14:26:23.000000000 -0500
@@ -46,6 +46,8 @@
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
 #include <linux/pid_namespace.h>
+#include <linux/ptrace.h>
+#include <linux/bootmem.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -733,6 +735,91 @@ static struct file_operations proc_page_
 };
 #endif
 
+#ifdef CONFIG_PROC_KPAGEMAP
+#define KPMSIZE (sizeof(unsigned long) * 2)
+#define KPMMASK (KPMSIZE - 1)
+/* /proc/kpagemap - an array exposing page flags and counts
+ *
+ * Each entry is a pair of unsigned longs representing the
+ * corresponding physical page, the first containing the page flags
+ * and the second containing the page use count.
+ *
+ * The first 4 bytes of this file form a simple header:
+ *
+ * first byte:   0 for big endian, 1 for little
+ * second byte:  page shift (eg 12 for 4096 byte pages)
+ * third byte:   entry size in bytes (currently either 4 or 8)
+ * fourth byte:  header size
+ */
+static ssize_t kpagemap_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	unsigned long *page;
+	struct page *ppage;
+	unsigned long src = *ppos;
+	unsigned long pfn;
+	ssize_t ret = 0;
+	int chunk, i;
+
+	pfn = src / KPMSIZE - 1;
+	count = min_t(size_t, count, ((max_pfn + 1) * KPMSIZE) - src);
+	if (src & KPMMASK || count & KPMMASK)
+		return -EIO;
+
+	page = (unsigned long *)__get_free_page(GFP_USER);
+	if (!page)
+		return -ENOMEM;
+
+	while (count > 0) {
+		chunk = min_t(size_t, count, PAGE_SIZE);
+		i = 0;
+
+		if (pfn == -1) {
+			page[0] = 0;
+			page[1] = 0;
+			((char *)page)[0] = (ntohl(1) != 1);
+			((char *)page)[1] = PAGE_SHIFT;
+			((char *)page)[2] = sizeof(unsigned long);
+			((char *)page)[3] = KPMSIZE;
+			i = 2;
+			pfn++;
+		}
+
+		for (; i < 2 * chunk / KPMSIZE; i += 2, pfn++) {
+			ppage = pfn_to_page(pfn);
+			if (!ppage) {
+				page[i] = 0;
+				page[i + 1] = 0;
+			} else {
+				page[i] = ppage->flags;
+				page[i + 1] = atomic_read(&ppage->_count);
+			}
+		}
+		chunk = (i / 2) * KPMSIZE;
+
+		if (copy_to_user(buf, page, chunk)) {
+			ret = -EFAULT;
+			break;
+		}
+		ret += chunk;
+		src += chunk;
+		buf += chunk;
+		count -= chunk;
+		cond_resched();
+	}
+	*ppos = src;
+
+	free_page((unsigned long)page);
+	return ret;
+}
+
+struct proc_dir_entry *proc_kpagemap;
+static struct file_operations proc_kpagemap_operations = {
+	.llseek = mem_lseek,
+	.read = kpagemap_read,
+};
+#endif
+
 struct proc_dir_entry *proc_root_kcore;
 
 void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
@@ -812,6 +899,11 @@ void __init proc_misc_init(void)
 				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
 	}
 #endif
+#ifdef CONFIG_PROC_KPAGEMAP
+	proc_kpagemap = create_proc_entry("kpagemap", S_IRUSR, NULL);
+	if (proc_kpagemap)
+		proc_kpagemap->proc_fops = &proc_kpagemap_operations;
+#endif
 #ifdef CONFIG_PROC_VMCORE
 	proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
 	if (proc_vmcore)
Index: mm/init/Kconfig
===================================================================
--- mm.orig/init/Kconfig	2007-04-05 14:18:49.000000000 -0500
+++ mm/init/Kconfig	2007-04-05 14:26:23.000000000 -0500
@@ -612,6 +612,15 @@ config PROC_PAGEMAP
           with other processes. Disabling this interface will reduce the
           size of the kernel for small machines.
 
+config PROC_KPAGEMAP
+	default y
+	bool "Enable /proc/kpagemap support" if EMBEDDED && PROC_FS
+	help
+	  The /proc/pid/kpagemap interface allows reading the
+          kernel's per-page flag and usage counts to gather precise
+          information on page-level memory usage. Disabling this interface
+          will reduce the size of the kernel for small machines.
+
 endmenu		# General setup
 
 config RT_MUTEXES
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux