Re: [patch][rfc] 5/5: core remove PageReserved

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -337,7 +338,7 @@ static inline void get_page(struct page 
>  static inline void put_page(struct page *page)
>  {
> -	if (!PageReserved(page) && put_page_testzero(page))
> +	if (put_page_testzero(page))
>  		__page_cache_release(page);
>  }

No sweep before this? I'm not so sure.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -514,7 +534,8 @@ int copy_page_range(struct mm_struct *ds
>  	return 0;
>  }
>  
> -static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> +static void zap_pte_range(struct mmu_gather *tlb,
> +				struct vm_area_struct *vma, pmd_t *pmd,
>  				unsigned long addr, unsigned long end,
>  				struct zap_details *details)
>  {

As exciting as this is, !!(vma->vm_flags & VM_RESERVED) could trivially
go into struct zap_details without excess args or diff.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -1225,6 +1251,8 @@ static int do_wp_page(struct mm_struct *
>  	unsigned long pfn = pte_pfn(pte);
>  	pte_t entry;
>  
> +	BUG_ON(vma->vm_flags & VM_RESERVED);
> +
>  	if (unlikely(!pfn_valid(pfn))) {
>  		/*
>  		 * This should really halt the system so it can be debugged or

!pfn_valid(pfn) is banned when !(vma->vm_flags & VM_RESERVED); here we
BUG_ON the precondition then handle !pfn_valid(pfn) in the old manner
where some new infrastructure has been erected for reporting such errors.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -1259,13 +1286,16 @@ static int do_wp_page(struct mm_struct *
>  	/*
>  	 * Ok, we need to copy. Oh, well..
>  	 */
> -	if (!PageReserved(old_page))
> +	if (old_page == ZERO_PAGE(address))
> +		old_page = NULL;
> +	else
>  		page_cache_get(old_page);
> +
>  	spin_unlock(&mm->page_table_lock);
>  
>  	if (unlikely(anon_vma_prepare(vma)))
>  		goto no_new_page;
> -	if (old_page == ZERO_PAGE(address)) {
> +	if (old_page == NULL) {
>  		new_page = alloc_zeroed_user_highpage(vma, address);
>  		if (!new_page)
>  			goto no_new_page;

There are some micro-optimizations mixed in with this and some
subsequent do_wp_page() alterations.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -1654,7 +1656,7 @@ void __init memmap_init_zone(unsigned lo
>  
>  	for (page = start; page < (start + size); page++) {
>  		set_page_zone(page, NODEZONE(nid, zone));
> -		set_page_count(page, 0);
> +		set_page_count(page, 1);
>  		reset_page_mapcount(page);
>  		SetPageReserved(page);
>  		INIT_LIST_HEAD(&page->lru);

The refcounting and PG_reserved activity in memmap_init_zone() is
superfluous. bootmem.c does all the necessary accounting internally.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/mm/fremap.c
> ===================================================================
> --- linux-2.6.orig/mm/fremap.c
> +++ linux-2.6/mm/fremap.c
> @@ -29,18 +29,21 @@ static inline void zap_pte(struct mm_str
>  		return;
>  	if (pte_present(pte)) {
>  		unsigned long pfn = pte_pfn(pte);
> +		struct page *page;
>  
>  		flush_cache_page(vma, addr, pfn);
>  		pte = ptep_clear_flush(vma, addr, ptep);
> -		if (pfn_valid(pfn)) {
> -			struct page *page = pfn_to_page(pfn);
> -			if (!PageReserved(page)) {
> -				if (pte_dirty(pte))
> -					set_page_dirty(page);
> -				page_remove_rmap(page);
> -				page_cache_release(page);
> -				dec_mm_counter(mm, rss);
> -			}
> +		if (unlikely(!pfn_valid(pfn))) {
> +			print_invalid_pfn("zap_pte", pte, vma->vm_flags, addr);
> +			return;
> +		}
> +		page = pfn_to_page(pfn);
> +		if (page != ZERO_PAGE(addr)) {
> +			if (pte_dirty(pte))
> +				set_page_dirty(page);
> +			page_remove_rmap(page);
> +			dec_mm_counter(mm, rss);
> +			page_cache_release(page);
>  		}
>  	} else {
>  		if (!pte_file(pte))

There is no error returned here to be handled by the caller.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -65,6 +68,8 @@ int install_page(struct mm_struct *mm, s
>  	pgd_t *pgd;
>  	pte_t pte_val;
>  
> +	BUG_ON(vma->vm_flags & VM_RESERVED);
> +
>  	pgd = pgd_offset(mm, addr);
>  	spin_lock(&mm->page_table_lock);
>  	
> @@ -122,6 +127,8 @@ int install_file_pte(struct mm_struct *m
>  	pgd_t *pgd;
>  	pte_t pte_val;
>  
> +	BUG_ON(vma->vm_flags & VM_RESERVED);
> +
>  	pgd = pgd_offset(mm, addr);
>  	spin_lock(&mm->page_table_lock);

This has no effect but to artificially constrain the interface. There
is no a priori reason to avoid the use of install_page() to deposit
mappings to normal pages in VM_RESERVED vmas. It's only the reverse
being "banned" here. Similar comments also apply to zap_pte().


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/drivers/scsi/sg.c
> ===================================================================
> --- linux-2.6.orig/drivers/scsi/sg.c
> +++ linux-2.6/drivers/scsi/sg.c
> @@ -1887,9 +1887,10 @@ st_unmap_user_pages(struct scatterlist *
>  	int i;
>  
>  	for (i=0; i < nr_pages; i++) {
> -		if (dirtied && !PageReserved(sgl[i].page))
> +		if (dirtied)
>  			SetPageDirty(sgl[i].page);
>  		/* unlock_page(sgl[i].page); */
> +		/* FIXME: XXX don't dirty/unmap VM_RESERVED regions? */
>  		/* FIXME: cache flush missing for rw==READ
>  		 * FIXME: call the correct reference counting function
>  		 */

An answer should be devised for this. My numerous SCSI CD-ROM devices
(I have 5 across several different machines of several different arches)
are rather unlikely to be happy with /* FIXME: XXX ... as an answer.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/drivers/scsi/st.c
> ===================================================================
> --- linux-2.6.orig/drivers/scsi/st.c
> +++ linux-2.6/drivers/scsi/st.c
> @@ -4435,8 +4435,9 @@ static int sgl_unmap_user_pages(struct s
>  	int i;
>  
>  	for (i=0; i < nr_pages; i++) {
> -		if (dirtied && !PageReserved(sgl[i].page))
> +		if (dirtied)
>  			SetPageDirty(sgl[i].page);
> +		/* FIXME: XXX don't dirty/unmap VM_RESERVED regions? */
>  		/* FIXME: cache flush missing for rw==READ
>  		 * FIXME: call the correct reference counting function
>  		 */

Mutatis mutandis for my SCSI tape drive.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/sound/core/pcm_native.c
> ===================================================================
> --- linux-2.6.orig/sound/core/pcm_native.c
> +++ linux-2.6/sound/core/pcm_native.c
> @@ -2942,8 +2942,7 @@ static struct page * snd_pcm_mmap_status
>  		return NOPAGE_OOM;
>  	runtime = substream->runtime;
>  	page = virt_to_page(runtime->status);
> -	if (!PageReserved(page))
> -		get_page(page);
> +	get_page(page);
>  	if (type)
>  		*type = VM_FAULT_MINOR;
>  	return page;

snd_malloc_pages() marks all pages it allocates PG_reserved. This
merits some commentary, and likely the removal of the superfluous
PG_reserved usage.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/mm/mmap.c
> ===================================================================
> --- linux-2.6.orig/mm/mmap.c
> +++ linux-2.6/mm/mmap.c
> @@ -1073,6 +1073,17 @@ munmap_back:
>  		error = file->f_op->mmap(file, vma);
>  		if (error)
>  			goto unmap_and_free_vma;
> +		if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED))
> +						== (VM_WRITE | VM_RESERVED)) {
> +			printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
> +				"PROT_WRITE mmap of VM_RESERVED memory, which "
> +				"is deprecated. Please report this to "
> +				"[email protected]\n",current->comm);
> +			if (vma->vm_ops && vma->vm_ops->close)
> +				vma->vm_ops->close(vma);
> +			error = -EACCES;
> +			goto unmap_and_free_vma;
> +		}
>  	} else if (vm_flags & VM_SHARED) {
>  		error = shmem_zero_setup(vma);
>  		if (error)

This is user-triggerable where the driver honors mmap() protection
flags blindly.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/mm/bootmem.c
> ===================================================================
> --- linux-2.6.orig/mm/bootmem.c
> +++ linux-2.6/mm/bootmem.c
> @@ -286,6 +286,7 @@ static unsigned long __init free_all_boo
>  				if (j + 16 < BITS_PER_LONG)
>  					prefetchw(page + j + 16);
>  				__ClearPageReserved(page + j);
> +				set_page_count(page + j, 0);
>  			}
>  			__free_pages(page, order);
>  			i += BITS_PER_LONG;

ibid re: bootmem


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> Index: linux-2.6/kernel/power/swsusp.c
> ===================================================================
> --- linux-2.6.orig/kernel/power/swsusp.c
> +++ linux-2.6/kernel/power/swsusp.c
> @@ -433,15 +433,12 @@ static int save_highmem_zone(struct zone
>  			continue;
>  		page = pfn_to_page(pfn);
>  		/*
> +		 * PageReserved(page) -
>  		 * This condition results from rvmalloc() sans vmalloc_32()
>  		 * and architectural memory reservations. This should be
>  		 * corrected eventually when the cases giving rise to this
>  		 * are better understood.
>  		 */
> -		if (PageReserved(page)) {
> -			printk("highmem reserved page?!\n");
> -			continue;
> -		}
>  		BUG_ON(PageNosave(page));
>  		if (PageNosaveFree(page))
>  			continue;

This behavioral change needs to be commented on. There are some additional
difficulties when memory holes are unintentionally covered by mem_map[];
It is beneficial otherwise. It's likely to triplefault on such holes.


On Thu, Jun 23, 2005 at 05:08:24PM +1000, Nick Piggin wrote:
> @@ -527,13 +524,8 @@ static int saveable(struct zone * zone, 
>  		return 0;
>  
>  	page = pfn_to_page(pfn);
> -	BUG_ON(PageReserved(page) && PageNosave(page));
>  	if (PageNosave(page))
>  		return 0;
> -	if (PageReserved(page) && pfn_is_nosave(pfn)) {
> -		pr_debug("[nosave pfn 0x%lx]", pfn);
> -		return 0;
> -	}
>  	if (PageNosaveFree(page))
>  		return 0;

The pfn_is_nosave() check must stand barring justification of why
unintentionally saving (and hence restoring) the page is tolerable.


-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux