Use the migration entries for page migration
This modifies the migration code to use the new migration entries.
It now becomes possible to migrate anonymous pages without having to
add a swap entry.
We add a couple of new functions to replace migration entries with the proper
ptes.
We cannot take the tree_lock for migrating anonymous pages anymore. However,
we know that we hold the only remaining reference to the page when the page
count reaches 1.
Signed-off-by: Christoph Lameter <[email protected]>
Index: linux-2.6.17-rc1-mm2/mm/migrate.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/migrate.c 2006-04-13 15:58:54.000000000 -0700
+++ linux-2.6.17-rc1-mm2/mm/migrate.c 2006-04-13 16:36:28.000000000 -0700
@@ -15,6 +15,7 @@
#include <linux/migrate.h>
#include <linux/module.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
@@ -23,7 +24,6 @@
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
-#include <linux/swapops.h>
#include "internal.h"
@@ -115,6 +115,95 @@ int putback_lru_pages(struct list_head *
return count;
}
+static inline int is_swap_pte(pte_t pte)
+{
+ return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
+}
+
+/*
+ * Restore a potential migration pte to a working pte entry for
+ * anonymous pages.
+ */
+static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr,
+ struct page *old, struct page *new)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ swp_entry_t entry;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_present(*pgd))
+ return;
+
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud))
+ return;
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ return;
+
+ ptep = pte_offset_map(pmd, addr);
+
+ if (!is_swap_pte(*ptep)) {
+ pte_unmap(ptep);
+ return;
+ }
+
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ pte = *ptep;
+ if (!is_swap_pte(pte))
+ goto out;
+
+ entry = pte_to_swp_entry(pte);
+
+ if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
+ goto out;
+
+ inc_mm_counter(mm, anon_rss);
+ get_page(new);
+ set_pte_at(mm, addr, ptep, pte_mkold(mk_pte(new, vma->vm_page_prot)));
+ page_add_anon_rmap(new, vma, addr);
+out:
+ pte_unmap_unlock(pte, ptl);
+}
+
+/*
+ * Get rid of all migration entries and replace them by
+ * references to the indicated page.
+ *
+ * Must hold mmap_sem lock on at least one of the vmas containing
+ * the page so that the anon_vma cannot vanish.
+ */
+static void remove_migration_ptes(struct page *old, struct page *new)
+{
+ struct anon_vma *anon_vma;
+ struct vm_area_struct *vma;
+ unsigned long mapping;
+
+ mapping = (unsigned long)new->mapping;
+
+ if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
+ return;
+
+ /*
+ * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
+ */
+ anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
+ spin_lock(&anon_vma->lock);
+
+ list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
+ remove_migration_pte(vma, page_address_in_vma(new, vma),
+ old, new);
+
+ spin_unlock(&anon_vma->lock);
+}
+
/*
* Non migratable page
*/
@@ -125,8 +214,9 @@ int fail_migrate_page(struct page *newpa
EXPORT_SYMBOL(fail_migrate_page);
/*
- * Remove references for a page and establish the new page with the correct
- * basic settings to be able to stop accesses to the page.
+ * Remove or replace all references to a page so that future accesses to
+ * the page can be blocked. Establish the new page
+ * with the basic settings to be able to stop accesses to the page.
*/
int migrate_page_remove_references(struct page *newpage,
struct page *page, int nr_refs)
@@ -139,38 +229,51 @@ int migrate_page_remove_references(struc
* indicates that the page is in use or truncate has removed
* the page.
*/
- if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
- return -EAGAIN;
+ if (!page->mapping ||
+ page_mapcount(page) + nr_refs != page_count(page))
+ return -EAGAIN;
/*
- * Establish swap ptes for anonymous pages or destroy pte
+ * Establish migration ptes for anonymous pages or destroy pte
* maps for files.
*
* In order to reestablish file backed mappings the fault handlers
* will take the radix tree_lock which may then be used to stop
* processses from accessing this page until the new page is ready.
*
- * A process accessing via a swap pte (an anonymous page) will take a
- * page_lock on the old page which will block the process until the
- * migration attempt is complete. At that time the PageSwapCache bit
- * will be examined. If the page was migrated then the PageSwapCache
- * bit will be clear and the operation to retrieve the page will be
- * retried which will find the new page in the radix tree. Then a new
- * direct mapping may be generated based on the radix tree contents.
- *
- * If the page was not migrated then the PageSwapCache bit
- * is still set and the operation may continue.
+ * A process accessing via a migration pte (an anonymous page) will
+ * take a page_lock on the old page which will block the process
+ * until the migration attempt is complete.
*/
if (try_to_unmap(page, 1) == SWAP_FAIL)
/* A vma has VM_LOCKED set -> permanent failure */
return -EPERM;
/*
- * Give up if we were unable to remove all mappings.
+ * Retry if we were unable to remove all mappings.
*/
if (page_mapcount(page))
return -EAGAIN;
+ if (!mapping) {
+ /*
+ * Anonymous page without swap mapping.
+ * User space cannot access the page anymore since we
+ * removed the ptes. Now check if the kernel still has
+ * pending references.
+ */
+ if (page_count(page) != nr_refs)
+ return -EAGAIN;
+
+ /* We are holding the only remaining reference */
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ return 0;
+ }
+
+ /*
+ * The page has a mapping that we need to change
+ */
write_lock_irq(&mapping->tree_lock);
radix_pointer = (struct page **)radix_tree_lookup_slot(
@@ -194,10 +297,13 @@ int migrate_page_remove_references(struc
get_page(newpage);
newpage->index = page->index;
newpage->mapping = page->mapping;
+
+#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
+#endif
*radix_pointer = newpage;
__put_page(page);
@@ -232,7 +338,9 @@ void migrate_page_copy(struct page *newp
set_page_dirty(newpage);
}
+#ifdef CONFIG_SWAP
ClearPageSwapCache(page);
+#endif
ClearPageActive(page);
ClearPagePrivate(page);
set_page_private(page, 0);
@@ -259,22 +367,16 @@ int migrate_page(struct page *newpage, s
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
- rc = migrate_page_remove_references(newpage, page, 2);
+ rc = migrate_page_remove_references(newpage, page,
+ page_mapping(page) ? 2 : 1);
- if (rc)
+ if (rc) {
+ remove_migration_ptes(page, page);
return rc;
+ }
migrate_page_copy(newpage, page);
-
- /*
- * Remove auxiliary swap entries and replace
- * them with real ptes.
- *
- * Note that a real pte entry will allow processes that are not
- * waiting on the page lock to use the new page via the page tables
- * before the new page is unlocked.
- */
- remove_from_swap(newpage);
+ remove_migration_ptes(page, newpage);
return 0;
}
EXPORT_SYMBOL(migrate_page);
@@ -356,9 +458,11 @@ redo:
* Try to migrate the page.
*/
mapping = page_mapping(page);
- if (!mapping)
+ if (!mapping) {
+ rc = migrate_page(newpage, page);
goto unlock_both;
+ } else
if (mapping->a_ops->migratepage) {
/*
* Most pages have a mapping and most filesystems
Index: linux-2.6.17-rc1-mm2/mm/Kconfig
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/Kconfig 2006-04-02 20:22:10.000000000 -0700
+++ linux-2.6.17-rc1-mm2/mm/Kconfig 2006-04-13 15:58:56.000000000 -0700
@@ -138,8 +138,8 @@ config SPLIT_PTLOCK_CPUS
#
config MIGRATION
bool "Page migration"
- def_bool y if NUMA
- depends on SWAP && NUMA
+ def_bool y
+ depends on NUMA
help
Allows the migration of the physical location of pages of processes
while the virtual addresses are not changed. This is useful for
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]