Re: [RFC 7/8] Enhance ramfs to support higher order pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Variable Order Page Cache: mmap_nopage and mmap_populate

Fix up both functions to be able to operate on arbitrary order
pages. However, both functions establish page table entries
in PAGE_SIZE only and the offset and pgoffset when calling
both functions is always in PAGE_SIZE units. Thus the parameters
were renamed to pgoff_page which is in PAGE_SIZE unites in
constrast to pgoff which is in the order prescribed by the
address space.

As a result both functions may handle a page struct pointer to
a tail page. That is the page to be mapped or that was mapped.
However, that page struct cannot be used to get a refcount
or mark page characteristics. This can only be done on the
head page!

We need to fixup install_page also since filemap_populate
relies on it.

[WARNING: Early early draft may not compile untested]

---
 mm/filemap.c |   38 ++++++++++++++++++++++++++++----------
 mm/fremap.c  |   17 +++++++++++------
 2 files changed, 39 insertions(+), 16 deletions(-)

Index: linux-2.6.21-rc7/mm/filemap.c
===================================================================
--- linux-2.6.21-rc7.orig/mm/filemap.c	2007-04-19 21:26:16.000000000 -0700
+++ linux-2.6.21-rc7/mm/filemap.c	2007-04-19 21:27:55.000000000 -0700
@@ -1318,6 +1318,12 @@ static int fastcall page_cache_read(stru
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
+ *
+ * filemap_nopage returns pointer to a page that may be a tail page
+ * of a compound page suitable for the VM to map a PAGE_SIZE portion.
+ * However, the VM must update state information in the head page
+ * alone. F.e. Taking a refcount on a tail page does not have the
+ * intended effect.
  */
 struct page *filemap_nopage(struct vm_area_struct *area,
 				unsigned long address, int *type)
@@ -1328,13 +1334,15 @@ struct page *filemap_nopage(struct vm_ar
 	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
 	struct page *page;
-	unsigned long size, pgoff;
+	unsigned long size, pgoff, pgoff_page, compound_index;
 	int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
-	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+	pgoff_page = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+	pgoff = pgoff_page >> mapping->order;
+	compound_index = pg_off_page % (1 << mapping->order);
 
 retry_all:
-	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	size = (i_size_read(inode) + page_cache_size(mapping) - 1) >> page_cache_shift(mapping);
 	if (pgoff >= size)
 		goto outside_data_content;
 
@@ -1412,7 +1420,7 @@ success:
 	mark_page_accessed(page);
 	if (type)
 		*type = majmin;
-	return page;
+	return page + compound_index;
 
 outside_data_content:
 	/*
@@ -1637,8 +1645,12 @@ err:
 	return NULL;
 }
 
+/*
+ * filemap_populate installs page sized ptes in the indicated area.
+ * However, the underlying pages may be of higher order.
+ */
 int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
-		unsigned long len, pgprot_t prot, unsigned long pgoff,
+		unsigned long len, pgprot_t prot, unsigned long pgoff_page,
 		int nonblock)
 {
 	struct file *file = vma->vm_file;
@@ -1648,14 +1660,20 @@ int filemap_populate(struct vm_area_stru
 	struct mm_struct *mm = vma->vm_mm;
 	struct page *page;
 	int err;
+	unsigned long pgoff;
+	int compound_index;
 
 	if (!nonblock)
 		force_page_cache_readahead(mapping, vma->vm_file,
-					pgoff, len >> PAGE_CACHE_SHIFT);
+			pgoff_page >> mapping->order,
+			len >> page_cache_shift(mapping));
 
 repeat:
-	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
+	pgoff = pgoff_page >> mapping->order;
+	compound_index = pgoff_page % (1 << mapping->order);
+
+	size = (i_size_read(inode) + page_cache_size(mapping) - 1) >> page_cache_shift(mapping);
+	if (pgoff + (len >> page_cache_shift(mapping)) > size)
 		return -EINVAL;
 
 	page = filemap_getpage(file, pgoff, nonblock);
@@ -1666,7 +1684,7 @@ repeat:
 		return -ENOMEM;
 
 	if (page) {
-		err = install_page(mm, vma, addr, page, prot);
+		err = install_page(mm, vma, addr, page + compound_index, prot);
 		if (err) {
 			page_cache_release(page);
 			return err;
@@ -1682,7 +1700,7 @@ repeat:
 
 	len -= PAGE_SIZE;
 	addr += PAGE_SIZE;
-	pgoff++;
+	pgoff_page++;
 	if (len)
 		goto repeat;
 
Index: linux-2.6.21-rc7/mm/fremap.c
===================================================================
--- linux-2.6.21-rc7.orig/mm/fremap.c	2007-04-19 21:33:34.000000000 -0700
+++ linux-2.6.21-rc7/mm/fremap.c	2007-04-19 21:37:30.000000000 -0700
@@ -46,7 +46,9 @@ static int zap_pte(struct mm_struct *mm,
 
 /*
  * Install a file page to a given virtual memory address, release any
- * previously existing mapping.
+ * previously existing mapping. The page may point to a tail page
+ * in which case we update the state in the head page but establish
+ * a PAGE_SIZEd mapping to the tail page alone.
  */
 int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long addr, struct page *page, pgprot_t prot)
@@ -57,6 +59,8 @@ int install_page(struct mm_struct *mm, s
 	pte_t *pte;
 	pte_t pte_val;
 	spinlock_t *ptl;
+	struct address_space *mapping;
+	struct head_page *page = compound_head(page);
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
@@ -67,12 +71,13 @@ int install_page(struct mm_struct *mm, s
 	 * caller about it.
 	 */
 	err = -EINVAL;
-	inode = vma->vm_file->f_mapping->host;
-	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (!page->mapping || page->index >= size)
+	mapping = vma->vm_file->f_mapping;
+	inode = mapping->host;
+	size = (i_size_read(inode) + page_cache_size(mapping) - 1) >> page_cache_shift(mapping);
+	if (!head_page->mapping || head_page->index >= size)
 		goto unlock;
 	err = -ENOMEM;
-	if (page_mapcount(page) > INT_MAX/2)
+	if (page_mapcount(head_page) > INT_MAX/2)
 		goto unlock;
 
 	if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
@@ -81,7 +86,7 @@ int install_page(struct mm_struct *mm, s
 	flush_icache_page(vma, page);
 	pte_val = mk_pte(page, prot);
 	set_pte_at(mm, addr, pte, pte_val);
-	page_add_file_rmap(page);
+	page_add_file_rmap(head_page);
 	update_mmu_cache(vma, addr, pte_val);
 	lazy_mmu_prot_update(pte_val);
 	err = 0;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux