Re: [PATCH] mm: poison struct page for ptlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hugh Dickins <[email protected]> wrote:
>
> The split ptlock patch enlarged the default SMP PREEMPT struct page from
> 32 to 36 bytes on most 32-bit platforms, from 32 to 44 bytes on PA-RISC
> 7xxx (without PREEMPT).  That was not my intention, and I don't believe
> that split ptlock deserves any such slice of the user's memory.
> 
> While leaving most of the page_private() mods in place for the moment,
> could we please try this patch, or something like it?  Again to overlay
> the spinlock_t from &page->private onwards, with corrected BUILD_BUG_ON
> that we don't go beyond ->lru; with poisoning of the fields overlaid,
> and unsplit config verifying that the split config is safe to use them.
> 

This patch makes the ppc64 crash.  See
http://www.zip.com.au/~akpm/linux/patches/stuff/dsc02976.jpg

I don't know what the access address was (ia32 nicely tells you), but if
it's `DAR' then we have LIST_POISON1.  Which would indicate that the slab
page which backs the mm_struct itself is getting freed-up-pte-page
treatment, which is deeply screwed up.

I'll try it on x86_64 and ia64, see if it's specific to ppc64.

> 
>  include/linux/mm.h |   69 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 files changed, 56 insertions(+), 13 deletions(-)
> 
> --- 2.6.14-git6/include/linux/mm.h	2005-11-03 18:38:01.000000000 +0000
> +++ linux/include/linux/mm.h	2005-11-03 18:46:06.000000000 +0000
> @@ -226,18 +226,19 @@ struct page {
>  					 * to show when page is mapped
>  					 * & limit reverse map searches.
>  					 */
> -	union {
> -		unsigned long private;	/* Mapping-private opaque data:
> +	unsigned long private;		/* Mapping-private opaque data:
>  					 * usually used for buffer_heads
>  					 * if PagePrivate set; used for
>  					 * swp_entry_t if PageSwapCache
>  					 * When page is free, this indicates
>  					 * order in the buddy system.
>  					 */
> -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
> -		spinlock_t ptl;
> -#endif
> -	} u;
> +	/*
> +	 * Along with private, the mapping, index and lru fields of a
> +	 * page table page's struct page may be overlaid by a spinlock
> +	 * for pte locking: see comment on "split ptlock" below.  Please
> +	 * do not rearrange these fields without considering that usage.
> +	 */
>  	struct address_space *mapping;	/* If low bit clear, points to
>  					 * inode address_space, or NULL.
>  					 * If page mapped as anonymous
> @@ -265,8 +266,8 @@ struct page {
>  #endif /* WANT_PAGE_VIRTUAL */
>  };
>  
> -#define page_private(page)		((page)->u.private)
> -#define set_page_private(page, v)	((page)->u.private = (v))
> +#define page_private(page)		((page)->private)
> +#define set_page_private(page, v)	((page)->private = (v))
>  
>  /*
>   * FIXME: take this include out, include page-flags.h in
> @@ -787,25 +788,67 @@ static inline pmd_t *pmd_alloc(struct mm
>  }
>  #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
>  
> +/*
> + * In the split ptlock case, we shall be overlaying the struct page
> + * of a page table page with a spinlock starting at &page->private,
> + * ending dependent on architecture and config, but never beyond lru.
> + *
> + * So poison the struct page in all cases (in part to assert our
> + * territory: that pte locking owns these fields of a page table
> + * struct page), and verify it when freeing in the unsplit ptlock
> + * case, when none of these fields should have been touched.
> + * Poison lru back-to-front, to make sure list_del was not used.
> + *
> + * The time may come when important configs requiring split ptlock
> + * have a spinlock_t which cannot fit here: then kmalloc a spinlock_t
> + * (perhaps in its own cacheline) and keep the pointer in struct page.
> + */
> +static inline void poison_struct_page(struct page *page)
> +{
> +	page->private = (unsigned long) page;
> +	page->mapping = (struct address_space *) page;
> +	page->index   = (pgoff_t) page;
> +	page->lru.next = LIST_POISON2;
> +	page->lru.prev = LIST_POISON1;
> +}
> +
> +static inline void verify_struct_page(struct page *page)
> +{
> +	BUG_ON(page->private != (unsigned long) page);
> +	BUG_ON(page->mapping != (struct address_space *) page);
> +	BUG_ON(page->index   != (pgoff_t) page);
> +	BUG_ON(page->lru.next != LIST_POISON2);
> +	BUG_ON(page->lru.prev != LIST_POISON1);
> +	/*
> +	 * Reset page->mapping so free_pages_check won't complain.
> +	 */
> +	page->mapping = NULL;
> +}
> +
>  #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
>  /*
>   * We tuck a spinlock to guard each pagetable page into its struct page,
>   * at page->private, with BUILD_BUG_ON to make sure that this will not
> - * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
> - * When freeing, reset page->mapping so free_pages_check won't complain.
> + * overflow beyond page->lru (as it might with PA-RISC DEBUG_SPINLOCK).
>   */
> -#define __pte_lockptr(page)	&((page)->u.ptl)
> +#define __pte_lockptr(page)	((spinlock_t *)&((page)->private))
>  #define pte_lock_init(_page)	do {					\
> +	BUILD_BUG_ON(__pte_lockptr((struct page *)0) + 1 >		\
> +			(spinlock_t *)(&((struct page *)0)->lru + 1));	\
> +	poison_struct_page(_page);					\
>  	spin_lock_init(__pte_lockptr(_page));				\
>  } while (0)
> +/*
> + * When freeing, reset page->mapping so free_pages_check won't complain.
> + */
>  #define pte_lock_deinit(page)	((page)->mapping = NULL)
>  #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
>  #else
>  /*
>   * We use mm->page_table_lock to guard all pagetable pages of the mm.
>   */
> -#define pte_lock_init(page)	do {} while (0)
> -#define pte_lock_deinit(page)	do {} while (0)
> +#define pte_lock_init(page)	poison_struct_page(page)
> +#define pte_lock_deinit(page)	verify_struct_page(page)
>  #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
>  #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
>  
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux