[UPDATED] Re: [PATCH 1/5] Add pagetable allocation notifiers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wright wrote:

* Zachary Amsden ([email protected]) wrote:
Chris Wright wrote:

* Zachary Amsden ([email protected]) wrote:


--- linux-2.6.13.orig/arch/i386/mm/init.c 2005-08-24 09:31:05.000000000 -0700
+++ linux-2.6.13/arch/i386/mm/init.c	2005-08-24 09:31:31.000000000 -0700
@@ -79,6 +79,7 @@ static pte_t * __init one_page_table_ini
{
	if (pmd_none(*pmd)) {
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		SetPagePTE(virt_to_page(page_table));
Xen has this on one_md_table_init() as well for pmd.
I'll add that in another patch. It's easy to miss some of the init time call sites (we don't actually depend on them for correctness).

You could just respin this patch.

Done. Looks like you want empty_zero_page write protected too. That seems like a fine idea to me unless something really wants to do atomic 64-bit reads on it.

Why the switch of kmem_cache_free call?
Because pgd_val(pgd[i])-1 is confusing.

Yes, I agree it is.  That's why I was wondering about the ClearPagePDE calls
which don't use them.

The -1 is quite useless when you're going to shift >> 12 anyways to get a frame number to index into mem_map, which is why they are not there. Plus, it just seems scary if you got it wrong - let's say you had a not present page - not that you could, but now you are freeing a misaligned address in the _previous_ page. I really don't like that -1 at all. I will clean it up, but it does certainly deserve another patch.

Using (pgd_val(pgd[i]) - _PAGE_PRESENT) would be better, but the +/- 1s all over the place here could use some general cleanup as well. I smell a cleanup fit coming on. Using (pgd_val(pgd[i]) & PAGE_MASK) is a less error prone way to get the physical frame bits, since it is not wrong if you turn on PCD or PWD.

Please save all that for later cleanup.  As it stands it's just a conversion
of one random call site, which should be dropped from the patch.

Working on it now.
Hooks are provided for the mach-XXX subarchitecture at the time prior to a
page being used as a page table at all levels, for PAE and non-PAE kernels.
Note that in PAE mode, multiple PDP roots may exist on the same page with
other data, so the root must be shadowed instead.  This is not a performance
issue, since PAE only uses 4 top level PDPEs.

The hooks are:

SetPagePTE(ppn) - indicates that a given physical page is going to be used
as a page table.

SetPagePDE(ppn) - indicates that a given physical page is going to be used
as a page directory.

ClearPageXXX(ppn) - indicates that the physical page is now done being
used as a certain type of page.

These hooks can be used in two ways; for shadow mode, they serve as
requests to pre-allocate and deallocate shadow page tables, and for
direct page table mode, they serve as write protect/unprotect requests.

Signed-off-by: Zachary Amsden <[email protected]>
Patch-subject: Add pagetable allocation notifiers
Index: linux-2.6.13/arch/i386/mm/init.c
===================================================================
--- linux-2.6.13.orig/arch/i386/mm/init.c	2005-08-24 14:34:30.000000000 -0700
+++ linux-2.6.13/arch/i386/mm/init.c	2005-08-24 14:35:50.000000000 -0700
@@ -59,6 +59,7 @@ static pmd_t * __init one_md_table_init(
 		
 #ifdef CONFIG_X86_PAE
 	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+	SetPagePDE(virt_to_page(pmd_table));
 	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 	pud = pud_offset(pgd, 0);
 	if (pmd_table != pmd_offset(pud, 0)) 
@@ -79,6 +80,7 @@ static pte_t * __init one_page_table_ini
 {
 	if (pmd_none(*pmd)) {
 		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		SetPagePTE(virt_to_page(page_table));
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 		if (page_table != pte_offset_kernel(pmd, 0))
 			BUG();	
@@ -313,6 +315,7 @@ static void __init pagetable_init (void)
 #ifdef CONFIG_X86_PAE
 	int i;
 	/* Init entries of the first-level page table to the zero page */
+	SetPagePDE(virt_to_page(empty_zero_page));
 	for (i = 0; i < PTRS_PER_PGD; i++)
 		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
 #endif
Index: linux-2.6.13/arch/i386/mm/pageattr.c
===================================================================
--- linux-2.6.13.orig/arch/i386/mm/pageattr.c	2005-08-24 14:34:30.000000000 -0700
+++ linux-2.6.13/arch/i386/mm/pageattr.c	2005-08-24 14:36:09.000000000 -0700
@@ -52,6 +52,7 @@ static struct page *split_large_page(uns
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK; 
 	pbase = (pte_t *)page_address(base);
+	SetPagePTE(virt_to_page(pbase));
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
                set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
                                           addr == address ? prot : PAGE_KERNEL));
@@ -146,6 +147,7 @@ __change_page_attr(struct page *page, pg
 		BUG_ON(!page_count(kpte_page));
 
 		if (cpu_has_pse && (page_count(kpte_page) == 1)) {
+			ClearPagePTE(virt_to_page(kpte));
 			list_add(&kpte_page->lru, &df_list);
 			revert_page(kpte_page, address);
 		}
Index: linux-2.6.13/arch/i386/mm/pgtable.c
===================================================================
--- linux-2.6.13.orig/arch/i386/mm/pgtable.c	2005-08-24 14:34:30.000000000 -0700
+++ linux-2.6.13/arch/i386/mm/pgtable.c	2005-08-24 14:35:50.000000000 -0700
@@ -209,6 +209,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
 
 	if (PTRS_PER_PMD == 1) {
 		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+		SetPagePDE(virt_to_page(pgd));
 		spin_lock_irqsave(&pgd_lock, flags);
 	}
 
@@ -227,6 +228,7 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
 {
 	unsigned long flags; /* can be called from interrupt context */
 
+	ClearPagePDE(virt_to_page(pgd));
 	spin_lock_irqsave(&pgd_lock, flags);
 	pgd_list_del(pgd);
 	spin_unlock_irqrestore(&pgd_lock, flags);
@@ -244,13 +246,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
 		if (!pmd)
 			goto out_oom;
+		SetPagePDE(virt_to_page(pmd));
 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
 	}
 	return pgd;
 
 out_oom:
-	for (i--; i >= 0; i--)
+	for (i--; i >= 0; i--) {
+		ClearPagePDE(pfn_to_page(pgd_val(pgd[i]) >> PAGE_SHIFT));
 		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+	}
 	kmem_cache_free(pgd_cache, pgd);
 	return NULL;
 }
@@ -261,8 +266,10 @@ void pgd_free(pgd_t *pgd)
 
 	/* in the PAE case user pgd entries are overwritten before usage */
 	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+			ClearPagePDE(pfn_to_page(pgd_val(pgd[i]) >> PAGE_SHIFT));
 			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+		}
 	/* in the non-PAE case, free_pgtables() clears user pgd entries */
 	kmem_cache_free(pgd_cache, pgd);
 }
Index: linux-2.6.13/include/asm-i386/pgalloc.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/pgalloc.h	2005-08-24 14:34:30.000000000 -0700
+++ linux-2.6.13/include/asm-i386/pgalloc.h	2005-08-24 14:35:50.000000000 -0700
@@ -5,14 +5,22 @@
 #include <asm/fixmap.h>
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
+#include <mach_pgalloc.h>
 
-#define pmd_populate_kernel(mm, pmd, pte) \
-		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+#define pmd_populate_kernel(mm, pmd, pte) 			\
+do {								\
+	SetPagePTE(virt_to_page(pte));				\
+	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));		\
+} while (0)
 
 #define pmd_populate(mm, pmd, pte) 				\
+do {								\
+	SetPagePTE(pte);					\
 	set_pmd(pmd, __pmd(_PAGE_TABLE +			\
 		((unsigned long long)page_to_pfn(pte) <<	\
-			(unsigned long long) PAGE_SHIFT)))
+			(unsigned long long) PAGE_SHIFT)));	\
+} while (0)
+
 /*
  * Allocate and free page tables.
  */
@@ -33,7 +41,11 @@ static inline void pte_free(struct page 
 }
 
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) 		\
+do {						\
+	tlb_remove_page((tlb),(pte));		\
+	ClearPagePTE(pte);			\
+} while (0)
 
 #ifdef CONFIG_X86_PAE
 /*
Index: linux-2.6.13/include/asm-i386/mach-default/mach_pgalloc.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/mach-default/mach_pgalloc.h	2005-08-24 14:34:30.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/mach_pgalloc.h	2005-08-24 14:35:50.000000000 -0700
@@ -1,7 +1,40 @@
+/*
+ * Copyright (C) 2005, VMware, Inc.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to [email protected]
+ *
+ */
 #ifndef __ASM_MACH_PGALLOC_H
 #define __ASM_MACH_PGALLOC_H
 
+/*
+ * These hooks allow the hypervisor to be notified about page type
+ * transition events.
+ */
+
+#define SetPagePTE(_page)
+#define ClearPagePTE(_page)
+#define SetPagePDE(_page)
+#define ClearPagePDE(_page)
+
 #define SetPagesLDT(_va, _pages)
 #define ClearPagesLDT(_va, _pages)
 
-#endif
+#endif /* _ASM_MACH_PGALLOC_H */

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux