[patch 3/3 take2] smaps: add clear_refs file to clear reference

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adds an additional file to /proc/pid: clear_refs.  When any non-zero
number is written to this file, all the PG_referenced flags and
PAGE_ACCESSED (meaning the page has been accessed) are cleared within each
VMA for the corresponding task.

It is now possible to measure how much memory a task is using by clearing
the reference bits with

	echo 1 > /proc/pid/clear_refs

and checking the reference count for each VMA from the /proc/pid/smaps
output at a time interval later.

The /proc/pid/clear_refs file is only writable by the user who owns the
task.

Cc: Hugh Dickins <[email protected]>
Cc: Paul Mundt <[email protected]>
Cc: Christoph Lameter <[email protected]>
Signed-off-by: David Rientjes <[email protected]>
---
 fs/proc/base.c          |   31 +++++++++++++++++++++++++++++++
 fs/proc/task_mmu.c      |   37 +++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |    1 +
 3 files changed, 69 insertions(+), 0 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea..b50315f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -715,6 +715,35 @@ static struct file_operations proc_oom_adjust_operations = {
 	.write		= oom_adjust_write,
 };
 
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	char buffer[PROC_NUMBUF], *end;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count))
+		return -EFAULT;
+	if (!simple_strtol(buffer, &end, 0))
+		return -EINVAL;
+	if (*end == '\n')
+		end++;
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (!task)
+		return -ESRCH;
+	clear_refs_smap(task->mm->mmap);
+	put_task_struct(task);
+	if (end - buffer == 0)
+		return -EIO;
+	return end - buffer;
+}
+
+static struct file_operations proc_clear_refs_operations = {
+	.write		= clear_refs_write,
+};
+
 #ifdef CONFIG_AUDITSYSCALL
 #define TMPBUFLEN 21
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -1856,6 +1885,7 @@ static struct pid_entry tgid_base_stuff[] = {
 	REG("mounts",     S_IRUGO, mounts),
 	REG("mountstats", S_IRUSR, mountstats),
 #ifdef CONFIG_MMU
+	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",      S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
@@ -2137,6 +2167,7 @@ static struct pid_entry tid_base_stuff[] = {
 	LNK("exe",       exe),
 	REG("mounts",    S_IRUGO, mounts),
 #ifdef CONFIG_MMU
+	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",     S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd004..b689a92 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -322,6 +322,27 @@ static void smaps_pte_func(struct pte_walker *walker, pte_t *pte,
 }
 
 /*
+ * Called for each PTE in the struct pte_walker address range.  For all normal,
+ * present pages, we clear their referenced bits.
+ */
+static void clear_refs_pte_func(struct pte_walker *walker, pte_t *pte,
+				unsigned long addr)
+{
+	struct page *page;
+	pte_t ptent;
+
+	ptent = *pte;
+	if (!pte_present(ptent))
+		return;
+
+	page = vm_normal_page(walker->vma, addr, ptent);
+	if (!page)
+		return;
+	pte_mkold(ptent);
+	ClearPageReferenced(page);
+}
+
+/*
  * Displays the smap for the process.  smaps_pte_func() is called for each PTE
  * in the range from vma->vm_start to vma->vm_end.
  */
@@ -343,6 +364,22 @@ static int show_smap(struct seq_file *m, void *v)
 	return show_map_internal(m, v, &mss);
 }
 
+void clear_refs_smap(struct vm_area_struct *vma)
+{
+	for (; vma; vma = vma->vm_next) {
+		struct pte_walker walker = {
+			.vma		= vma,
+			.start		= vma->vm_start,
+			.end		= vma->vm_end,
+			.private	= NULL,
+			.func		= clear_refs_pte_func,
+		};
+
+		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+			walk_pgds(&walker);
+	};
+}
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct proc_maps_private *priv = m->private;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 87dec8f..f3d426b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -104,6 +104,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
 char *task_mem(struct mm_struct *, char *);
+void clear_refs_smap(struct vm_area_struct *);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux