Re: [PATCH 4/11] maps3: introduce a generic page walker

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Matt Mackall wrote:
> Introduce a general page table walker
>   

Definitely approve in principle, but some comments:

> Signed-off-by: Matt Mackall <[email protected]>
>
> Index: l/include/linux/mm.h
> ===================================================================
> --- l.orig/include/linux/mm.h	2007-10-09 17:37:59.000000000 -0500
> +++ l/include/linux/mm.h	2007-10-10 11:46:37.000000000 -0500
> @@ -773,6 +773,17 @@ unsigned long unmap_vmas(struct mmu_gath
>  		struct vm_area_struct *start_vma, unsigned long start_addr,
>  		unsigned long end_addr, unsigned long *nr_accounted,
>  		struct zap_details *);
> +
> +struct mm_walk {
> +	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
> +	int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
> +	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
> +	int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
> +	int (*pte_hole) (unsigned long, unsigned long, void *);
> +};
>   

It would be nice to have some clue about when each of these functions
are called (depth first? pre or post order?), and what their params
are.  Does it call a callback for folded pagetable levels?

Can pte_hole be used to create new mappings while we're traversing the
pagetable?  Apparently not, because it continues after calling it.

> +
> +int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end,
> +		    struct mm_walk *walk, void *private);
>  void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
>  		unsigned long end, unsigned long floor, unsigned long ceiling);
>  void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
> Index: l/mm/Makefile
> ===================================================================
> --- l.orig/mm/Makefile	2007-10-09 17:37:59.000000000 -0500
> +++ l/mm/Makefile	2007-10-10 11:46:37.000000000 -0500
> @@ -5,7 +5,7 @@
>  mmu-y			:= nommu.o
>  mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
>  			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
> -			   vmalloc.o
> +			   vmalloc.o pagewalk.o
>  
>  obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
>  			   page_alloc.o page-writeback.o pdflush.o \
> Index: l/mm/pagewalk.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ l/mm/pagewalk.c	2007-10-10 11:46:37.000000000 -0500
> @@ -0,0 +1,120 @@
> +#include <linux/mm.h>
> +#include <linux/highmem.h>
> +#include <linux/sched.h>
> +
> +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> +			  struct mm_walk *walk, void *private)
> +{
> +	pte_t *pte;
> +	int err = 0;
> +
> +	pte = pte_offset_map(pmd, addr);
> +	do {
> +		err = walk->pte_entry(pte, addr, addr, private);
>   

Should this be (pte, addr, addr+PAGE_SIZE, private)?  Is the second addr
argument for the address range being mapped by this thing?  Why pass
addr twice?

> +		if (err)
> +		       break;
> +	} while (pte++, addr += PAGE_SIZE, addr != end);
> +
> +	pte_unmap(pte);
> +	return err;
> +}
> +
> +static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
> +			  struct mm_walk *walk, void *private)
> +{
> +	pmd_t *pmd;
> +	unsigned long next;
> +	int err = 0;
> +
> +	pmd = pmd_offset(pud, addr);
> +	do {
> +		next = pmd_addr_end(addr, end);
> +		if (pmd_none_or_clear_bad(pmd)) {
> +			if (walk->pte_hole)
> +				err = walk->pte_hole(addr, next, private);
> +			if (err)
> +				break;
> +			continue;
> +		}
> +		if (walk->pmd_entry)
> +			err = walk->pmd_entry(pmd, addr, next, private);
> +		if (!err && walk->pte_entry)
> +			err = walk_pte_range(pmd, addr, next, walk, private);
> +		if (err)
> +			break;
> +	} while (pmd++, addr = next, addr != end);
> +
> +	return err;
> +}
> +
> +static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
> +			  struct mm_walk *walk, void *private)
> +{
> +	pud_t *pud;
> +	unsigned long next;
> +	int err = 0;
> +
> +	pud = pud_offset(pgd, addr);
> +	do {
> +		next = pud_addr_end(addr, end);
> +		if (pud_none_or_clear_bad(pud)) {
> +			if (walk->pte_hole)
> +				err = walk->pte_hole(addr, next, private);
> +			if (err)
> +				break;
> +			continue;
> +		}
> +		if (walk->pud_entry)
> +			err = walk->pud_entry(pud, addr, next, private);
> +		if (!err && (walk->pmd_entry || walk->pte_entry))
> +			err = walk_pmd_range(pud, addr, next, walk, private);
> +		if (err)
> +			break;
> +	} while (pud++, addr = next, addr != end);
> +
> +	return err;
> +}
> +
> +/*
> + * walk_page_range - walk a memory map's page tables with a callback
> + * @mm - memory map to walk
> + * @addr - starting address
> + * @end - ending address
> + * @walk - set of callbacks to invoke for each level of the tree
> + * @private - private data passed to the callback function
> + *
> + * Recursively walk the page table for the memory area in a VMA, calling
> + * a callback for every bottom-level (PTE) page table.
>   

It calls a callback for every level of the pagetable.

> + */
> +int walk_page_range(struct mm_struct *mm,
> +		    unsigned long addr, unsigned long end,
> +		    struct mm_walk *walk, void *private)
> +{
> +	pgd_t *pgd;
> +	unsigned long next;
> +	int err = 0;
> +
> +	if (addr >= end)
> +		return err;
> +
> +	pgd = pgd_offset(mm, addr);
> +	do {
> +		next = pgd_addr_end(addr, end);
> +		if (pgd_none_or_clear_bad(pgd)) {
> +			if (walk->pte_hole)
> +				err = walk->pte_hole(addr, next, private);
> +			if (err)
> +				break;
> +			continue;
> +		}
> +		if (walk->pgd_entry)
> +			err = walk->pgd_entry(pgd, addr, next, private);
> +		if (!err &&
> +		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
> +			err = walk_pud_range(pgd, addr, next, walk, private);
> +		if (err)
> +			return err;
> +	} while (pgd++, addr = next, addr != end);
> +
> +	return err;
> +}
>   

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux