Re: [PATCH 1/5] freepgt: free_pgtables use vma list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2005-03-22 at 12:21 -0800, David S. Miller wrote:
> On Tue, 22 Mar 2005 19:36:46 +0000 (GMT)
> Hugh Dickins <[email protected]> wrote:
> 
> > I notice that although both i386 and sparc64 use pgtable-nopud.h, the
> > i386 pud_clear does nothing at all and the sparc64 pud_clear resets to 0.
> 
> Aha!  And ppc does as well via asm-generic/4level-fixup.h which is
> probably why I did it this way as well when I converted sparc64
> over to asm-generic/pgtable-nopud.h
> 
> Hmmm, let me play around with this.

I converted ppc64 to no-pud.h but it seems to introduce some breakage...
X kills the box when launched on the G5, haven't had time to investigate
yet, it might be some bug of mine. Here is my current patch:


Index: linux-work/include/asm-ppc64/pgalloc.h
===================================================================
--- linux-work.orig/include/asm-ppc64/pgalloc.h	2005-03-07 14:00:11.000000000 +1100
+++ linux-work/include/asm-ppc64/pgalloc.h	2005-03-17 17:00:50.000000000 +1100
@@ -30,7 +30,7 @@
 	kmem_cache_free(zero_cache, pgd);
 }
 
-#define pgd_populate(MM, PGD, PMD)	pgd_set(PGD, PMD)
+#define pud_populate(MM, PUD, PMD)	pud_set(PUD, PMD)
 
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
Index: linux-work/include/asm-ppc64/pgtable.h
===================================================================
--- linux-work.orig/include/asm-ppc64/pgtable.h	2005-03-07 14:00:11.000000000 +1100
+++ linux-work/include/asm-ppc64/pgtable.h	2005-03-17 17:00:50.000000000 +1100
@@ -1,8 +1,6 @@
 #ifndef _PPC64_PGTABLE_H
 #define _PPC64_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
-
 /*
  * This file contains the functions and defines necessary to modify and use
  * the ppc64 hashed page table.
@@ -17,6 +15,8 @@
 #include <asm/tlbflush.h>
 #endif /* __ASSEMBLY__ */
 
+#include <asm-generic/pgtable-nopud.h>
+
 /* PMD_SHIFT determines what a second-level page table entry can map */
 #define PMD_SHIFT	(PAGE_SHIFT + PAGE_SHIFT - 3)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
@@ -216,12 +216,12 @@
 #define pmd_page_kernel(pmd)	\
 	(__bpn_to_ba(pmd_val(pmd) >> PMD_TO_PTEPAGE_SHIFT))
 #define pmd_page(pmd)		virt_to_page(pmd_page_kernel(pmd))
-#define pgd_set(pgdp, pmdp)	(pgd_val(*(pgdp)) = (__ba_to_bpn(pmdp)))
-#define pgd_none(pgd)		(!pgd_val(pgd))
-#define pgd_bad(pgd)		((pgd_val(pgd)) == 0)
-#define pgd_present(pgd)	(pgd_val(pgd) != 0UL)
-#define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0UL)
-#define pgd_page(pgd)		(__bpn_to_ba(pgd_val(pgd))) 
+#define pud_set(pgdp, pmdp)	(pud_val(*(pgdp)) = (__ba_to_bpn(pmdp)))
+#define pud_none(pgd)		(!pud_val(pgd))
+#define pud_bad(pgd)		((pud_val(pgd)) == 0)
+#define pud_present(pgd)	(pud_val(pgd) != 0UL)
+#define pud_clear(pgdp)		(pud_val(*(pgdp)) = 0UL)
+#define pud_page(pgd)		(__bpn_to_ba(pud_val(pgd))) 
 
 /* 
  * Find an entry in a page-table-directory.  We combine the address region 
@@ -233,8 +233,8 @@
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
 /* Find an entry in the second-level page table.. */
-#define pmd_offset(dir,addr) \
-  ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+#define pmd_offset(pudp,addr) \
+  ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
 
 /* Find an entry in the third-level page table.. */
 #define pte_offset_kernel(dir,addr) \
@@ -552,20 +552,23 @@
 static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
 {
 	pgd_t *pg;
+	pud_t *pu;
 	pmd_t *pm;
 	pte_t *pt = NULL;
 	pte_t pte;
 
 	pg = pgdir + pgd_index(ea);
 	if (!pgd_none(*pg)) {
-
-		pm = pmd_offset(pg, ea);
-		if (pmd_present(*pm)) { 
-			pt = pte_offset_kernel(pm, ea);
-			pte = *pt;
-			if (!pte_present(pte))
-				pt = NULL;
-		}
+		pu = pud_offset(pg, ea);
+		if (!pud_none(*pu)) {			
+			pm = pmd_offset(pu, ea);
+			if (pmd_present(*pm)) { 
+				pt = pte_offset_kernel(pm, ea);
+				pte = *pt;
+				if (!pte_present(pte))
+					pt = NULL;
+			}
+		}			
 	}
 
 	return pt;
Index: linux-work/arch/ppc64/mm/init.c
===================================================================
--- linux-work.orig/arch/ppc64/mm/init.c	2005-03-15 11:57:29.000000000 +1100
+++ linux-work/arch/ppc64/mm/init.c	2005-03-17 17:20:14.000000000 +1100
@@ -136,14 +136,78 @@
 
 #else
 
+static void unmap_im_area_pte(pmd_t *pmd, unsigned long addr,
+				  unsigned long end)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		pte_t ptent = ptep_get_and_clear(&ioremap_mm, addr, pte);
+		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static inline void unmap_im_area_pmd(pud_t *pud, unsigned long addr,
+				     unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		unmap_im_area_pte(pmd, addr, next);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static inline void unmap_im_area_pud(pgd_t *pgd, unsigned long addr,
+				     unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		unmap_im_area_pmd(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void unmap_im_area(unsigned long addr, unsigned long end)
+{
+	struct mm_struct *mm = &ioremap_mm;
+	unsigned long next;
+	pgd_t *pgd;
+
+	spin_lock(&mm->page_table_lock);
+
+	pgd = pgd_offset_i(addr);
+	flush_cache_vunmap(addr, end);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		unmap_im_area_pud(pgd, addr, end - addr);
+	} while (pgd++, addr = next, addr != end);
+	flush_tlb_kernel_range(start, end);
+
+	spin_unlock(&mm->page_table_lock);
+}
+
 /*
  * map_io_page currently only called by __ioremap
  * map_io_page adds an entry to the ioremap page table
  * and adds an entry to the HPT, possibly bolting it
  */
-static void map_io_page(unsigned long ea, unsigned long pa, int flags)
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 {
 	pgd_t *pgdp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long vsid;
@@ -151,9 +215,15 @@
 	if (mem_init_done) {
 		spin_lock(&ioremap_mm.page_table_lock);
 		pgdp = pgd_offset_i(ea);
-		pmdp = pmd_alloc(&ioremap_mm, pgdp, ea);
+		pudp = pud_alloc(&ioremap_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&ioremap_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
 		ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea);
-
+		if (!ptep)
+			return -ENOMEM;
 		pa = abs_to_phys(pa);
 		set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		spin_unlock(&ioremap_mm.page_table_lock);
@@ -180,6 +250,7 @@
 			panic("map_io_page: could not insert mapping");
 		}
 	}
+	return 0;
 }
 
 
@@ -195,10 +266,15 @@
 		flags |= _PAGE_GUARDED;
 
 	for (i = 0; i < size; i += PAGE_SIZE) {
-		map_io_page(ea+i, pa+i, flags);
+		if (map_io_page(ea+i, pa+i, flags))
+			goto failure;
 	}
 
 	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+ failure:
+	if (mem_init_done)
+		unmap_im_area(ea, ea + size);
+	return NULL;
 }
 
 
@@ -208,10 +284,11 @@
 	return __ioremap(addr, size, _PAGE_NO_CACHE);
 }
 
-void __iomem *
-__ioremap(unsigned long addr, unsigned long size, unsigned long flags)
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
 {
 	unsigned long pa, ea;
+	void __iomem *ret;
 
 	/*
 	 * Choose an address to map it to.
@@ -234,12 +311,16 @@
 		if (area == NULL)
 			return NULL;
 		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			im_free(area->addr);
 	} else {
 		ea = ioremap_bot;
-		ioremap_bot += size;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
 	}
-
-	return __ioremap_com(addr, pa, ea, size, flags);
+	return ret;
 }
 
 #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
@@ -248,6 +329,7 @@
 		       unsigned long size, unsigned long flags)
 {
 	struct vm_struct *area;
+	void __iomem *ret;
 	
 	/* For now, require page-aligned values for pa, ea, and size */
 	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
@@ -277,7 +359,12 @@
 		}
 	}
 	
-	if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) {
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
 		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
 		return 1;
 	}
@@ -285,68 +372,6 @@
 	return 0;
 }
 
-static void unmap_im_area_pte(pmd_t *pmd, unsigned long address,
-				  unsigned long size)
-{
-	unsigned long base, end;
-	pte_t *pte;
-
-	if (pmd_none(*pmd))
-		return;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
-		return;
-	}
-
-	pte = pte_offset_kernel(pmd, address);
-	base = address & PMD_MASK;
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-
-	do {
-		pte_t page;
-		page = ptep_get_and_clear(&ioremap_mm, base + address, pte);
-		address += PAGE_SIZE;
-		pte++;
-		if (pte_none(page))
-			continue;
-		if (pte_present(page))
-			continue;
-		printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
-	} while (address < end);
-}
-
-static void unmap_im_area_pmd(pgd_t *dir, unsigned long address,
-				  unsigned long size)
-{
-	unsigned long base, end;
-	pmd_t *pmd;
-
-	if (pgd_none(*dir))
-		return;
-	if (pgd_bad(*dir)) {
-		pgd_ERROR(*dir);
-		pgd_clear(dir);
-		return;
-	}
-
-	pmd = pmd_offset(dir, address);
-	base = address & PGDIR_MASK;
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-
-	do {
-		unmap_im_area_pte(pmd, base + address, end - address);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-}
-
 /*  
  * Unmap an IO region and remove it from imalloc'd list.
  * Access to IO memory should be serialized by driver.
@@ -356,39 +381,19 @@
  */
 void iounmap(volatile void __iomem *token)
 {
-	unsigned long address, start, end, size;
-	struct mm_struct *mm;
-	pgd_t *dir;
+	unsigned long address, size;
 	void *addr;
 
-	if (!mem_init_done) {
+	if (!mem_init_done)
 		return;
-	}
 	
 	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
 	
-	if ((size = im_free(addr)) == 0) {
+	if ((size = im_free(addr)) == 0)
 		return;
-	}
 
 	address = (unsigned long)addr; 
-	start = address;
-	end = address + size;
-
-	mm = &ioremap_mm;
-	spin_lock(&mm->page_table_lock);
-
-	dir = pgd_offset_i(address);
-	flush_cache_vunmap(address, end);
-	do {
-		unmap_im_area_pmd(dir, address, end - address);
-		address = (address + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	} while (address && (address < end));
-	flush_tlb_kernel_range(start, end);
-
-	spin_unlock(&mm->page_table_lock);
-	return;
+	unmap_im_area(address, address + size);
 }
 
 static int iounmap_subset_regions(unsigned long addr, unsigned long size)
Index: linux-work/arch/ppc64/mm/hugetlbpage.c
===================================================================
--- linux-work.orig/arch/ppc64/mm/hugetlbpage.c	2005-03-07 14:00:11.000000000 +1100
+++ linux-work/arch/ppc64/mm/hugetlbpage.c	2005-03-17 17:00:50.000000000 +1100
@@ -42,7 +42,7 @@
 	return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
 }
 
-static pgd_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
+static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
 {
 	int index;
 
@@ -52,21 +52,21 @@
 
 	index = hugepgd_index(addr);
 	BUG_ON(index >= PTRS_PER_HUGEPGD);
-	return mm->context.huge_pgdir + index;
+	return (pud_t *)(mm->context.huge_pgdir + index);
 }
 
-static inline pte_t *hugepte_offset(pgd_t *dir, unsigned long addr)
+static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
 {
 	int index;
 
-	if (pgd_none(*dir))
+	if (pud_none(*dir))
 		return NULL;
 
 	index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
-	return (pte_t *)pgd_page(*dir) + index;
+	return (pte_t *)pud_page(*dir) + index;
 }
 
-static pgd_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
+static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
 {
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
@@ -90,10 +90,9 @@
 	return hugepgd_offset(mm, addr);
 }
 
-static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir,
-			    unsigned long addr)
+static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
 {
-	if (! pgd_present(*dir)) {
+	if (! pud_present(*dir)) {
 		pte_t *new;
 
 		spin_unlock(&mm->page_table_lock);
@@ -104,7 +103,7 @@
 		 * Because we dropped the lock, we should re-check the
 		 * entry, as somebody else could have populated it..
 		 */
-		if (pgd_present(*dir)) {
+		if (pud_present(*dir)) {
 			if (new)
 				kmem_cache_free(zero_cache, new);
 		} else {
@@ -115,7 +114,7 @@
 			ptepage = virt_to_page(new);
 			ptepage->mapping = (void *) mm;
 			ptepage->index = addr & HUGEPGDIR_MASK;
-			pgd_populate(mm, dir, new);
+			pud_populate(mm, dir, new);
 		}
 	}
 
@@ -124,28 +123,28 @@
 
 static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
-	pgd_t *pgd;
+	pud_t *pud;
 
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
-	pgd = hugepgd_offset(mm, addr);
-	if (! pgd)
+	pud = hugepgd_offset(mm, addr);
+	if (! pud)
 		return NULL;
 
-	return hugepte_offset(pgd, addr);
+	return hugepte_offset(pud, addr);
 }
 
 static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
-	pgd_t *pgd;
+	pud_t *pud;
 
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
-	pgd = hugepgd_alloc(mm, addr);
-	if (! pgd)
+	pud = hugepgd_alloc(mm, addr);
+	if (! pud)
 		return NULL;
 
-	return hugepte_alloc(mm, pgd, addr);
+	return hugepte_alloc(mm, pud, addr);
 }
 
 static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -218,6 +217,7 @@
 	tlb = tlb_gather_mmu(mm, 0);
 	for (addr = start; addr < end; addr += PMD_SIZE) {
 		pgd_t *pgd = pgd_offset(mm, addr);
+		pud_t *pud;
 		pmd_t *pmd;
 		struct page *page;
 		pte_t *pte;
@@ -225,7 +225,10 @@
 
 		if (pgd_none(*pgd))
 			continue;
-		pmd = pmd_offset(pgd, addr);
+		pud = pud_offset(pgd, addr);
+		if (pud_none(*pud))
+			continue;
+		pmd = pmd_offset(pud, addr);
 		if (!pmd || pmd_none(*pmd))
 			continue;
 		if (pmd_bad(*pmd)) {
@@ -756,10 +759,10 @@
 
 	/* cleanup any hugepte pages leftover */
 	for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
-		pgd_t *pgd = pgdir + i;
+		pud_t *pud = (pud_t *)(pgdir + i);
 
-		if (! pgd_none(*pgd)) {
-			pte_t *pte = (pte_t *)pgd_page(*pgd);
+		if (! pud_none(*pud)) {
+			pte_t *pte = (pte_t *)pud_page(*pud);
 			struct page *ptepage = virt_to_page(pte);
 
 			ptepage->mapping = NULL;
@@ -767,7 +770,7 @@
 			BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
 			kmem_cache_free(zero_cache, pte);
 		}
-		pgd_clear(pgd);
+		pud_clear(pud);
 	}
 
 	BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux