[patch 1/3] MAP_NOZERO - implement a new VM_NOZERO/MAP_NOZERO page retirement policy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is the core implementation of the new VM_NOZERO page retirement
policy (and the associated MAP_NOZERO).
A new field  owner_uid  is added the the  mm_struct, and it is kept set to
the effective UID of the task that own the  mm_struct.
A new field  owner_uid  is also added to the page struct.
When pages exit (unmapped from) a  vma, they are marked with the effective
UID of the  mm_struct  that owns it.
When pages exit the allocator, their  owner_uid  is cleared, unless the
new flag __GFP_UIDKEEP is passed to it. So every page fetcher other than
the new alloc_zeroed_page_vma(), clears the owner_uid and blocks all the
following uses of the uncleared page itself.
The new alloc_zeroed_page_vma() calls __alloc_pages() with the __GFP_UIDKEEP
flag, and checks if the VM_NOZERO flag is set in the vma, and if the  owner_uid
field of the page matches the one of the  mm_struct  owning the vma.
If any of these test fail, the page is cleared in the usual way, otherwise
it is passed back without being cleared.
Page-cache pages are (once unmapped) marked with the uid owning the  inode
of the mapping the pages are associated with.




Signed-off-by: Davide Libenzi <[email protected]>


- Davide



---
 include/asm-alpha/page.h     |    3 ++-
 include/asm-cris/page.h      |    3 ++-
 include/asm-generic/mman.h   |    1 +
 include/asm-h8300/page.h     |    3 ++-
 include/asm-i386/page.h      |    3 ++-
 include/asm-ia64/page.h      |    2 +-
 include/asm-m32r/page.h      |    3 ++-
 include/asm-m68knommu/page.h |    3 ++-
 include/asm-s390/page.h      |    3 ++-
 include/asm-x86_64/page.h    |    3 ++-
 include/linux/gfp.h          |    5 +++++
 include/linux/highmem.h      |    7 +------
 include/linux/mm.h           |   16 ++++++++++++++++
 include/linux/mm_types.h     |    1 +
 include/linux/mman.h         |    3 ++-
 include/linux/rmap.h         |    1 +
 include/linux/sched.h        |    3 +++
 kernel/fork.c                |    1 +
 kernel/sys.c                 |    3 +++
 mm/filemap.c                 |    2 ++
 mm/mmap.c                    |    3 ++-
 mm/page_alloc.c              |   33 +++++++++++++++++++++++++++++++++
 mm/rmap.c                    |   14 ++++++++++++++
 23 files changed, 102 insertions(+), 17 deletions(-)

Index: linux-2.6.mod/include/linux/sched.h
===================================================================
--- linux-2.6.mod.orig/include/linux/sched.h	2007-06-21 13:59:38.000000000 -0700
+++ linux-2.6.mod/include/linux/sched.h	2007-06-21 14:01:28.000000000 -0700
@@ -386,6 +386,9 @@
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+
+	/* Effective UID of the owner of this mm_struct */
+	uid_t			owner_uid;
 };
 
 struct sighand_struct {
Index: linux-2.6.mod/mm/rmap.c
===================================================================
--- linux-2.6.mod.orig/mm/rmap.c	2007-06-21 14:27:19.000000000 -0700
+++ linux-2.6.mod/mm/rmap.c	2007-06-25 17:42:59.000000000 -0700
@@ -627,6 +627,16 @@
 }
 #endif
 
+void page_set_owner(struct page *page, uid_t owner_uid)
+{
+	if (unlikely(PageCompound(page))) {
+		unsigned int nrpages = 1U << compound_order(page);
+		for (; nrpages; nrpages--, page++)
+			page_set_owner_uid(page, owner_uid);
+	} else
+		page_set_owner_uid(page, owner_uid);
+}
+
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
@@ -649,6 +659,10 @@
 				print_symbol (KERN_EMERG "  vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
 			BUG();
 		}
+		/*
+		 * Record the last owner of the page.
+		 */
+		page_set_owner(page, vma->vm_mm->owner_uid);
 
 		/*
 		 * It would be tidy to reset the PageAnon mapping here,
Index: linux-2.6.mod/kernel/fork.c
===================================================================
--- linux-2.6.mod.orig/kernel/fork.c	2007-06-21 14:32:44.000000000 -0700
+++ linux-2.6.mod/kernel/fork.c	2007-06-24 21:23:52.000000000 -0700
@@ -342,6 +342,7 @@
 	mm->ioctx_list = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm->owner_uid = current->euid;
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
Index: linux-2.6.mod/include/linux/highmem.h
===================================================================
--- linux-2.6.mod.orig/include/linux/highmem.h	2007-06-21 14:38:02.000000000 -0700
+++ linux-2.6.mod/include/linux/highmem.h	2007-06-22 12:10:36.000000000 -0700
@@ -76,12 +76,7 @@
 static inline struct page *
 alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
 {
-	struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
-
-	if (page)
-		clear_user_highpage(page, vaddr);
-
-	return page;
+	return alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr);
 }
 #endif
 
Index: linux-2.6.mod/include/linux/mm.h
===================================================================
--- linux-2.6.mod.orig/include/linux/mm.h	2007-06-21 14:43:06.000000000 -0700
+++ linux-2.6.mod/include/linux/mm.h	2007-06-25 19:27:42.000000000 -0700
@@ -169,6 +169,7 @@
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
+#define VM_NOZERO	0x08000000	/* Do not zero the page, if possible */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -647,6 +648,21 @@
 	return atomic_read(&(page)->_mapcount) >= 0;
 }
 
+static inline void reset_owner_uid(struct page *page)
+{
+	page->owner_uid = -1;
+}
+
+static inline uid_t page_owner_uid(struct page *page)
+{
+	return (uid_t) page->owner_uid;
+}
+
+static inline void page_set_owner_uid(struct page *page, uid_t uid)
+{
+	page->owner_uid = (int) uid;
+}
+
 /*
  * Error return values for the *_nopage functions
  */
Index: linux-2.6.mod/include/asm-alpha/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-alpha/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-alpha/page.h	2007-06-21 16:40:19.000000000 -0700
@@ -17,7 +17,8 @@
 extern void clear_page(void *page);
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 extern void copy_page(void * _to, void * _from);
Index: linux-2.6.mod/include/asm-cris/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-cris/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-cris/page.h	2007-06-21 16:40:08.000000000 -0700
@@ -20,7 +20,8 @@
 #define clear_user_page(page, vaddr, pg)    clear_page(page)
 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-h8300/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-h8300/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-h8300/page.h	2007-06-21 16:39:57.000000000 -0700
@@ -22,7 +22,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-i386/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-i386/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-i386/page.h	2007-06-21 16:39:47.000000000 -0700
@@ -34,7 +34,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-ia64/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-ia64/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-ia64/page.h	2007-06-21 16:39:27.000000000 -0700
@@ -89,7 +89,7 @@
 
 #define alloc_zeroed_user_highpage(vma, vaddr) \
 ({						\
-	struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \
+	struct page *page = alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr); \
 	if (page)				\
  		flush_dcache_page(page);	\
 	page;					\
Index: linux-2.6.mod/include/asm-m32r/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-m32r/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-m32r/page.h	2007-06-21 16:39:00.000000000 -0700
@@ -15,7 +15,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-m68knommu/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-m68knommu/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-m68knommu/page.h	2007-06-21 16:38:49.000000000 -0700
@@ -22,7 +22,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-s390/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-s390/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-s390/page.h	2007-06-21 16:38:35.000000000 -0700
@@ -64,7 +64,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 /*
Index: linux-2.6.mod/include/asm-x86_64/page.h
===================================================================
--- linux-2.6.mod.orig/include/asm-x86_64/page.h	2007-06-21 16:37:20.000000000 -0700
+++ linux-2.6.mod/include/asm-x86_64/page.h	2007-06-21 16:38:13.000000000 -0700
@@ -48,7 +48,8 @@
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+	alloc_zeroed_page_vma(vma, GFP_HIGHUSER, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 /*
  * These are used to make use of C type-checking..
Index: linux-2.6.mod/include/asm-generic/mman.h
===================================================================
--- linux-2.6.mod.orig/include/asm-generic/mman.h	2007-06-21 16:43:33.000000000 -0700
+++ linux-2.6.mod/include/asm-generic/mman.h	2007-06-21 18:14:55.000000000 -0700
@@ -13,6 +13,7 @@
 #define PROT_NONE	0x0		/* page can not be accessed */
 #define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
 #define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+#define MAP_NOZERO	0x04000000	/* Do not zero the pages, if possible */
 
 #define MAP_SHARED	0x01		/* Share changes */
 #define MAP_PRIVATE	0x02		/* Changes are private */
Index: linux-2.6.mod/include/linux/mman.h
===================================================================
--- linux-2.6.mod.orig/include/linux/mman.h	2007-06-21 16:47:03.000000000 -0700
+++ linux-2.6.mod/include/linux/mman.h	2007-06-21 16:47:45.000000000 -0700
@@ -63,7 +63,8 @@
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
-	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
+	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
+	       _calc_vm_trans(flags, MAP_NOZERO,     VM_NOZERO    );
 }
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMAN_H */
Index: linux-2.6.mod/mm/mmap.c
===================================================================
--- linux-2.6.mod.orig/mm/mmap.c	2007-06-21 16:48:31.000000000 -0700
+++ linux-2.6.mod/mm/mmap.c	2007-06-25 19:14:49.000000000 -0700
@@ -915,7 +915,8 @@
 
 	if (!len)
 		return -EINVAL;
-
+	if (file && (flags & MAP_NOZERO))
+		return -EINVAL;
 	error = arch_mmap_check(addr, len, flags);
 	if (error)
 		return error;
Index: linux-2.6.mod/mm/page_alloc.c
===================================================================
--- linux-2.6.mod.orig/mm/page_alloc.c	2007-06-22 10:56:07.000000000 -0700
+++ linux-2.6.mod/mm/page_alloc.c	2007-06-25 17:40:23.000000000 -0700
@@ -1370,11 +1370,44 @@
 		show_mem();
 	}
 got_pg:
+	if (page && !(gfp_mask & __GFP_UIDKEEP)) {
+		unsigned int pgcount = 1U << order;
+		struct page *npage = page;
+
+		/*
+		 * It'd be possible to remove the loop below by resetting
+		 * page->owner_uid when the page is handed back to the buddy
+		 * allocator. Here we would simply reset page->owner_uid only.
+		 * This reduces the efficency of page reuse though, since pages
+		 * used by a user may be reset too early.
+		 */
+		for (; pgcount; pgcount--, npage++)
+			reset_owner_uid(npage);
+	}
 	return page;
 }
 
 EXPORT_SYMBOL(__alloc_pages);
 
+static inline int page_need_clear(struct vm_area_struct *vma, struct page *page)
+{
+	return (vma->vm_flags & VM_NOZERO) == 0 ||
+		page_owner_uid(page) != vma->vm_mm->owner_uid;
+}
+
+struct page *alloc_zeroed_page_vma(struct vm_area_struct *vma, gfp_t gfp_mask,
+				   unsigned long vaddr)
+{
+	struct page *page = alloc_page_vma(gfp_mask | __GFP_UIDKEEP, vma, vaddr);
+
+	if (page) {
+		if (page_need_clear(vma, page))
+			clear_user_highpage(page, vaddr);
+		reset_owner_uid(page);
+	}
+	return page;
+}
+
 /*
  * Common helper functions.
  */
Index: linux-2.6.mod/include/linux/gfp.h
===================================================================
--- linux-2.6.mod.orig/include/linux/gfp.h	2007-06-21 16:32:34.000000000 -0700
+++ linux-2.6.mod/include/linux/gfp.h	2007-06-22 12:15:14.000000000 -0700
@@ -45,6 +45,7 @@
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_UIDKEEP	((__force gfp_t)0x80000u)	/* Do not clear owner UID */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -124,6 +125,10 @@
 extern struct page *
 FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
 
+extern struct page *alloc_zeroed_page_vma(struct vm_area_struct *vma,
+					  gfp_t gfp_mask,
+					  unsigned long vaddr);
+
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
Index: linux-2.6.mod/mm/filemap.c
===================================================================
--- linux-2.6.mod.orig/mm/filemap.c	2007-06-24 21:03:07.000000000 -0700
+++ linux-2.6.mod/mm/filemap.c	2007-06-24 22:12:40.000000000 -0700
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/mman.h>
+#include <linux/rmap.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/uio.h>
@@ -118,6 +119,7 @@
 
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
+	page_set_owner(page, mapping->host->i_uid);
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 }
Index: linux-2.6.mod/include/linux/rmap.h
===================================================================
--- linux-2.6.mod.orig/include/linux/rmap.h	2007-06-24 21:28:50.000000000 -0700
+++ linux-2.6.mod/include/linux/rmap.h	2007-06-24 21:29:13.000000000 -0700
@@ -72,6 +72,7 @@
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
+void page_set_owner(struct page *page, uid_t owner_uid);
 void page_remove_rmap(struct page *, struct vm_area_struct *);
 
 #ifdef CONFIG_DEBUG_VM
Index: linux-2.6.mod/include/linux/mm_types.h
===================================================================
--- linux-2.6.mod.orig/include/linux/mm_types.h	2007-06-21 14:02:06.000000000 -0700
+++ linux-2.6.mod/include/linux/mm_types.h	2007-06-25 19:11:22.000000000 -0700
@@ -64,6 +64,7 @@
 	struct list_head lru;		/* Pageout list, eg. active_list
 					 * protected by zone->lru_lock !
 					 */
+	int owner_uid;			/* Last owner of the page */
 	/*
 	 * On machines where all RAM is mapped into kernel address space,
 	 * we can simply calculate the virtual address. On machines with
Index: linux-2.6.mod/kernel/sys.c
===================================================================
--- linux-2.6.mod.orig/kernel/sys.c	2007-06-26 17:40:19.000000000 -0700
+++ linux-2.6.mod/kernel/sys.c	2007-06-26 17:46:08.000000000 -0700
@@ -1149,6 +1149,7 @@
 
 	if (new_euid != old_euid) {
 		current->mm->dumpable = suid_dumpable;
+		current->mm->owner_uid = new_euid;
 		smp_wmb();
 	}
 	current->fsuid = current->euid = new_euid;
@@ -1199,6 +1200,7 @@
 
 	if (old_euid != uid) {
 		current->mm->dumpable = suid_dumpable;
+		current->mm->owner_uid = uid;
 		smp_wmb();
 	}
 	current->fsuid = current->euid = uid;
@@ -1244,6 +1246,7 @@
 	if (euid != (uid_t) -1) {
 		if (euid != current->euid) {
 			current->mm->dumpable = suid_dumpable;
+			current->mm->owner_uid = euid;
 			smp_wmb();
 		}
 		current->euid = euid;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux