[PATCH 5/5] Swapless V2: Revise main migration logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Use the migration entries for page migration

This modifies the migration code to use the new migration entries.
It now becomes possible to migrate anonymous pages without having to
add a swap entry.

We add a couple of new functions to replace migration entries with the proper
ptes.

We cannot take the tree_lock for migrating anonymous pages anymore. However,
we know that we hold the only remaining reference to the page when the page
count reaches 1.

Signed-off-by: Christoph Lameter <[email protected]>

Index: linux-2.6.17-rc1-mm2/mm/migrate.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/migrate.c	2006-04-13 15:58:54.000000000 -0700
+++ linux-2.6.17-rc1-mm2/mm/migrate.c	2006-04-13 16:36:28.000000000 -0700
@@ -15,6 +15,7 @@
 #include <linux/migrate.h>
 #include <linux/module.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/mm_inline.h>
@@ -23,7 +24,6 @@
 #include <linux/topology.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
-#include <linux/swapops.h>
 
 #include "internal.h"
 
@@ -115,6 +115,95 @@ int putback_lru_pages(struct list_head *
 	return count;
 }
 
+static inline int is_swap_pte(pte_t pte)
+{
+	return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
+}
+
+/*
+ * Restore a potential migration pte to a working pte entry for
+ * anonymous pages.
+ */
+static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr,
+		struct page *old, struct page *new)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	swp_entry_t entry;
+ 	pgd_t *pgd;
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+	pte_t *ptep, pte;
+ 	spinlock_t *ptl;
+
+ 	pgd = pgd_offset(mm, addr);
+	 if (!pgd_present(*pgd))
+                return;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+                return;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		return;
+
+	ptep = pte_offset_map(pmd, addr);
+
+	if (!is_swap_pte(*ptep)) {
+		pte_unmap(ptep);
+ 		return;
+ 	}
+
+ 	ptl = pte_lockptr(mm, pmd);
+ 	spin_lock(ptl);
+	pte = *ptep;
+	if (!is_swap_pte(pte))
+		goto out;
+
+	entry = pte_to_swp_entry(pte);
+
+	if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
+		goto out;
+
+	inc_mm_counter(mm, anon_rss);
+	get_page(new);
+	set_pte_at(mm, addr, ptep, pte_mkold(mk_pte(new, vma->vm_page_prot)));
+	page_add_anon_rmap(new, vma, addr);
+out:
+	pte_unmap_unlock(pte, ptl);
+}
+
+/*
+ * Get rid of all migration entries and replace them by
+ * references to the indicated page.
+ *
+ * Must hold mmap_sem lock on at least one of the vmas containing
+ * the page so that the anon_vma cannot vanish.
+ */
+static void remove_migration_ptes(struct page *old, struct page *new)
+{
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+	unsigned long mapping;
+
+	mapping = (unsigned long)new->mapping;
+
+	if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
+		return;
+
+	/*
+	 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
+	 */
+	anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
+	spin_lock(&anon_vma->lock);
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
+		remove_migration_pte(vma, page_address_in_vma(new, vma),
+					old, new);
+
+	spin_unlock(&anon_vma->lock);
+}
+
 /*
  * Non migratable page
  */
@@ -125,8 +214,9 @@ int fail_migrate_page(struct page *newpa
 EXPORT_SYMBOL(fail_migrate_page);
 
 /*
- * Remove references for a page and establish the new page with the correct
- * basic settings to be able to stop accesses to the page.
+ * Remove or replace all references to a page so that future accesses to
+ * the page can be blocked. Establish the new page
+ * with the basic settings to be able to stop accesses to the page.
  */
 int migrate_page_remove_references(struct page *newpage,
 				struct page *page, int nr_refs)
@@ -139,38 +229,51 @@ int migrate_page_remove_references(struc
 	 * indicates that the page is in use or truncate has removed
 	 * the page.
 	 */
-	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
-		return -EAGAIN;
+	if (!page->mapping ||
+		page_mapcount(page) + nr_refs != page_count(page))
+			return -EAGAIN;
 
 	/*
-	 * Establish swap ptes for anonymous pages or destroy pte
+	 * Establish migration ptes for anonymous pages or destroy pte
 	 * maps for files.
 	 *
 	 * In order to reestablish file backed mappings the fault handlers
 	 * will take the radix tree_lock which may then be used to stop
   	 * processses from accessing this page until the new page is ready.
 	 *
-	 * A process accessing via a swap pte (an anonymous page) will take a
-	 * page_lock on the old page which will block the process until the
-	 * migration attempt is complete. At that time the PageSwapCache bit
-	 * will be examined. If the page was migrated then the PageSwapCache
-	 * bit will be clear and the operation to retrieve the page will be
-	 * retried which will find the new page in the radix tree. Then a new
-	 * direct mapping may be generated based on the radix tree contents.
-	 *
-	 * If the page was not migrated then the PageSwapCache bit
-	 * is still set and the operation may continue.
+	 * A process accessing via a migration pte (an anonymous page) will
+	 * take a page_lock on the old page which will block the process
+	 * until the migration attempt is complete.
 	 */
 	if (try_to_unmap(page, 1) == SWAP_FAIL)
 		/* A vma has VM_LOCKED set -> permanent failure */
 		return -EPERM;
 
 	/*
-	 * Give up if we were unable to remove all mappings.
+	 * Retry if we were unable to remove all mappings.
 	 */
 	if (page_mapcount(page))
 		return -EAGAIN;
 
+	if (!mapping) {
+		/*
+		 * Anonymous page without swap mapping.
+		 * User space cannot access the page anymore since we
+		 * removed the ptes. Now check if the kernel still has
+		 * pending references.
+		 */
+		if (page_count(page) != nr_refs)
+			return -EAGAIN;
+
+		/* We are holding the only remaining reference */
+		newpage->index = page->index;
+		newpage->mapping = page->mapping;
+		return 0;
+	}
+
+	/*
+	 * The page has a mapping that we need to change
+	 */
 	write_lock_irq(&mapping->tree_lock);
 
 	radix_pointer = (struct page **)radix_tree_lookup_slot(
@@ -194,10 +297,13 @@ int migrate_page_remove_references(struc
 	get_page(newpage);
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
+
+#ifdef CONFIG_SWAP
 	if (PageSwapCache(page)) {
 		SetPageSwapCache(newpage);
 		set_page_private(newpage, page_private(page));
 	}
+#endif
 
 	*radix_pointer = newpage;
 	__put_page(page);
@@ -232,7 +338,9 @@ void migrate_page_copy(struct page *newp
 		set_page_dirty(newpage);
  	}
 
+#ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
+#endif
 	ClearPageActive(page);
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
@@ -259,22 +367,16 @@ int migrate_page(struct page *newpage, s
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_remove_references(newpage, page, 2);
+	rc = migrate_page_remove_references(newpage, page,
+			page_mapping(page) ? 2 : 1);
 
-	if (rc)
+	if (rc) {
+		remove_migration_ptes(page, page);
 		return rc;
+	}
 
 	migrate_page_copy(newpage, page);
-
-	/*
-	 * Remove auxiliary swap entries and replace
-	 * them with real ptes.
-	 *
-	 * Note that a real pte entry will allow processes that are not
-	 * waiting on the page lock to use the new page via the page tables
-	 * before the new page is unlocked.
-	 */
-	remove_from_swap(newpage);
+	remove_migration_ptes(page, newpage);
 	return 0;
 }
 EXPORT_SYMBOL(migrate_page);
@@ -356,9 +458,11 @@ redo:
 		 * Try to migrate the page.
 		 */
 		mapping = page_mapping(page);
-		if (!mapping)
+		if (!mapping) {
+			rc = migrate_page(newpage, page);
 			goto unlock_both;
 
+		} else
 		if (mapping->a_ops->migratepage) {
 			/*
 			 * Most pages have a mapping and most filesystems
Index: linux-2.6.17-rc1-mm2/mm/Kconfig
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/Kconfig	2006-04-02 20:22:10.000000000 -0700
+++ linux-2.6.17-rc1-mm2/mm/Kconfig	2006-04-13 15:58:56.000000000 -0700
@@ -138,8 +138,8 @@ config SPLIT_PTLOCK_CPUS
 #
 config MIGRATION
 	bool "Page migration"
-	def_bool y if NUMA
-	depends on SWAP && NUMA
+	def_bool y
+	depends on NUMA
 	help
 	  Allows the migration of the physical location of pages of processes
 	  while the virtual addresses are not changed. This is useful for
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux