[BUGFIX][PATCH] DO flush icache before set_pte() on ia64.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a experimental patch for fixing icache flush race of ia64(Montecito).

Problem Description:
Montecito, new ia64 processor, has separated L2 i-cache and d-cache,
and i-cache and d-cache is not consistent in automatic way.

L1 cache is also separated but L1 D-cache is write-through. Then, before
Montecito, any changes in L1-dcache is visible in L2-mixed-cache consistently.

Montecito has separated L2 cache and Mixed L3 cache. But...L2 D-cache is
*write back*. (See http://download.intel.com/design/Itanium2/manuals/
30806501.pdf section 2.3.3)

Assume : valid data is in L2 d-cache and old data in L3 mixed cache.
If write-back L2->L3 is delayed, at L2 i-cache miss cpu will fetch old data
in L3 mixed cache. 
By this, L2-icache-miss will read wrong instruction from L3-mixed cache.
(Just I think so, is this correct ?)

Anyway, there is SIGILL problem in NFS/ia64 and icache flush can fix
SIGILL problem (in our HPC team test.)

Following SIGILL issue occurs in current kernel.
(This was a discussion in this April)
- http://www.gelato.unsw.edu.au/archives/linux-ia64/0704/20323.html
Usual file systems uses DMA and it purges cache. But NFS uses copy-by-cpu.

This is HP-UX's errata comment:
- http://h50221.www5.hp.com/upassist/itrc_japan/assist2/patchdigest/PHKL_36120.html
(Sorry for Japanese page...but English comments also written. See PHKL_36120)

Now, I think icache should be flushed before set_pte().
This is a patch to try that.

1. remove all lazy_mmu_prot_update()...which is used by only ia64.
2. implements flush_cache_page()/flush_icache_page() for ia64.

Something unsure....
3. mprotect() flushes cache before removing pte. Is this sane ?
   I added flush_icache_range() before set_pte() here.

Any comments and advices ?
 
Signed-off-by: KAMEZAWA Hiroyuki <[email protected]>

---
 arch/ia64/mm/init.c           |    7 +------
 include/asm-generic/pgtable.h |    4 ----
 include/asm-ia64/cacheflush.h |   24 ++++++++++++++++++++++--
 include/asm-ia64/pgtable.h    |    9 ---------
 mm/fremap.c                   |    1 -
 mm/memory.c                   |   13 ++++++-------
 mm/migrate.c                  |    6 +++++-
 mm/mprotect.c                 |   10 +++++++++-
 mm/rmap.c                     |    1 -
 9 files changed, 43 insertions(+), 32 deletions(-)

Index: linux-2.6.22-rc7/include/asm-ia64/cacheflush.h
===================================================================
--- linux-2.6.22-rc7.orig/include/asm-ia64/cacheflush.h
+++ linux-2.6.22-rc7/include/asm-ia64/cacheflush.h
@@ -10,18 +10,38 @@
 
 #include <asm/bitops.h>
 #include <asm/page.h>
+#include <linux/mm.h>
 
 /*
  * Cache flushing routines.  This is the kind of stuff that can be very expensive, so try
  * to avoid them whenever possible.
  */
+extern void __flush_icache_page_ia64(struct page *page);
 
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
 #define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
-#define flush_icache_page(vma,page)		do { } while (0)
+
+static inline void
+flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
+		unsigned long pfn)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		struct page *page;
+		if (!pfn_valid(pfn))
+			return;
+		page = pfn_to_page(pfn);
+		__flush_icache_page_ia64(page);
+	}
+}
+
+static inline void
+flush_icache_page(struct vm_area_struct *vma,struct page *page) {
+	if (vma->vm_flags & VM_EXEC)
+		__flush_icache_page_ia64(page);
+}
+
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
Index: linux-2.6.22-rc7/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.22-rc7.orig/arch/ia64/mm/init.c
+++ linux-2.6.22-rc7/arch/ia64/mm/init.c
@@ -105,16 +105,11 @@ check_pgt_cache(void)
 }
 
 void
-lazy_mmu_prot_update (pte_t pte)
+__flush_icache_page_ia64 (struct page *page)
 {
 	unsigned long addr;
-	struct page *page;
 	unsigned long order;
 
-	if (!pte_exec(pte))
-		return;				/* not an executable page... */
-
-	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
 
 	if (test_bit(PG_arch_1, &page->flags))
Index: linux-2.6.22-rc7/include/asm-ia64/pgtable.h
===================================================================
--- linux-2.6.22-rc7.orig/include/asm-ia64/pgtable.h
+++ linux-2.6.22-rc7/include/asm-ia64/pgtable.h
@@ -151,7 +151,6 @@
 
 #include <linux/sched.h>	/* for mm_struct */
 #include <asm/bitops.h>
-#include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 
@@ -506,13 +505,6 @@ extern struct page *zero_page_memmap_ptr
 #define HUGETLB_PGDIR_MASK	(~(HUGETLB_PGDIR_SIZE-1))
 #endif
 
-/*
- * IA-64 doesn't have any external MMU info: the page tables contain all the necessary
- * information.  However, we use this routine to take care of any (delayed) i-cache
- * flushing that may be necessary.
- */
-extern void lazy_mmu_prot_update (pte_t pte);
-
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
  * Update PTEP with ENTRY, which is guaranteed to be a less
@@ -593,7 +585,6 @@ do {											\
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #define __HAVE_ARCH_PGD_OFFSET_GATE
-#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
 
 #ifndef CONFIG_PGTABLE_4
 #include <asm-generic/pgtable-nopud.h>
Index: linux-2.6.22-rc7/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.22-rc7.orig/include/asm-generic/pgtable.h
+++ linux-2.6.22-rc7/include/asm-generic/pgtable.h
@@ -154,10 +154,6 @@ static inline void ptep_set_wrprotect(st
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #endif
 
-#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
-#define lazy_mmu_prot_update(pte)	do { } while (0)
-#endif
-
 #ifndef __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
Index: linux-2.6.22-rc7/mm/memory.c
===================================================================
--- linux-2.6.22-rc7.orig/mm/memory.c
+++ linux-2.6.22-rc7/mm/memory.c
@@ -1599,7 +1599,6 @@ static int do_wp_page(struct mm_struct *
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		ptep_set_access_flags(vma, address, page_table, entry, 1);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 		ret |= VM_FAULT_WRITE;
 		goto unlock;
 	}
@@ -1640,7 +1639,6 @@ gotten:
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		lazy_mmu_prot_update(entry);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
 		 * pte with the new entry. This will avoid a race condition
@@ -2105,7 +2103,6 @@ static int do_swap_page(struct mm_struct
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
-	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
@@ -2162,12 +2159,16 @@ static int do_anonymous_page(struct mm_s
 		inc_mm_counter(mm, file_rss);
 		page_add_file_rmap(page);
 	}
-
+	/*
+	 * new page is zero-filled, but we have to guarantee icache-dcache
+	 * synchronization before setting pte on some processor.
+	 */
+	if (write_access && (vma->vm_flags & VM_EXEC))
+		flush_icache_page(vma, page);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
@@ -2312,7 +2313,6 @@ retry:
 
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	if (dirty_page) {
@@ -2470,7 +2470,6 @@ static inline int handle_pte_fault(struc
 	if (!pte_same(old_entry, entry)) {
 		ptep_set_access_flags(vma, address, pte, entry, write_access);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch code
Index: linux-2.6.22-rc7/mm/migrate.c
===================================================================
--- linux-2.6.22-rc7.orig/mm/migrate.c
+++ linux-2.6.22-rc7/mm/migrate.c
@@ -172,6 +172,11 @@ static void remove_migration_pte(struct 
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (is_write_migration_entry(entry))
 		pte = pte_mkwrite(pte);
+	/*
+	 * If the processor doesn't guarantee icache-dicache synchronization.
+	 * We need to flush icache before set_pte.
+	 */
+	flush_icache_page(vma, new);
 	set_pte_at(mm, addr, ptep, pte);
 
 	if (PageAnon(new))
@@ -181,7 +186,6 @@ static void remove_migration_pte(struct 
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, pte);
-	lazy_mmu_prot_update(pte);
 
 out:
 	pte_unmap_unlock(ptep, ptl);
Index: linux-2.6.22-rc7/mm/mprotect.c
===================================================================
--- linux-2.6.22-rc7.orig/mm/mprotect.c
+++ linux-2.6.22-rc7/mm/mprotect.c
@@ -52,8 +52,16 @@ static void change_pte_range(struct mm_s
 			 */
 			if (dirty_accountable && pte_dirty(ptent))
 				ptent = pte_mkwrite(ptent);
+#ifdef CONFIG_SMP
+			/* we already flushed cache before reach here.
+			 * But that flush was done before removing pte.
+			 * we confirm i-cache consitency here again.
+			 * This is rare case.
+			 */
+			if (pte_exec(ptent))
+				flush_icache_range(addr, addr + PAGE_SIZE);
+#endif
 			set_pte_at(mm, addr, pte, ptent);
-			lazy_mmu_prot_update(ptent);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
Index: linux-2.6.22-rc7/mm/rmap.c
===================================================================
--- linux-2.6.22-rc7.orig/mm/rmap.c
+++ linux-2.6.22-rc7/mm/rmap.c
@@ -461,7 +461,6 @@ static int page_mkclean_one(struct page 
 		entry = pte_wrprotect(entry);
 		entry = pte_mkclean(entry);
 		set_pte_at(mm, address, pte, entry);
-		lazy_mmu_prot_update(entry);
 		ret = 1;
 	}
 
Index: linux-2.6.22-rc7/mm/fremap.c
===================================================================
--- linux-2.6.22-rc7.orig/mm/fremap.c
+++ linux-2.6.22-rc7/mm/fremap.c
@@ -83,7 +83,6 @@ int install_page(struct mm_struct *mm, s
 	set_pte_at(mm, addr, pte, pte_val);
 	page_add_file_rmap(page);
 	update_mmu_cache(vma, addr, pte_val);
-	lazy_mmu_prot_update(pte_val);
 	err = 0;
 unlock:
 	pte_unmap_unlock(pte, ptl);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux