[PATCH] Fix memmap accounting by approximating the map size

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Arch-independent zone-sizing uses account_memmap() in an attempt to accurately
account for how much memory was used in a zone by memmap.  Watermarks and
per-cpu sizes initialisations then take the memmap into account. However,
the memmap may span multiple zones and in one case, there was an underflow
causing boot failures.

The fix that perfectly accounts for memory consumed by memmap is complicated
with no clear benefit. The architecture-specific code in x86_64 was simpler
because it approximated how much memory was consumed for memmap backing that
zone regardless of where the memmap was really stored.

This patch ditches the account_memmap() complexity and replaces with the
simple approximation used by x86_64 while ensuring no underflow occurs.

Signed-off-by: Mel Gorman <[email protected]>

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm3-clean/mm/page_alloc.c linux-2.6.18-rc4-mm3-101_fix_account_memmap/mm/page_alloc.c
--- linux-2.6.18-rc4-mm3-clean/mm/page_alloc.c	2006-08-28 15:05:30.000000000 +0100
+++ linux-2.6.18-rc4-mm3-101_fix_account_memmap/mm/page_alloc.c	2006-09-07 14:36:05.000000000 +0100
@@ -2364,58 +2364,6 @@ static void __init calculate_node_totalp
 							realtotalpages);
 }
 
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
-/* Account for mem_map for CONFIG_FLAT_NODE_MEM_MAP */
-unsigned long __meminit account_memmap(struct pglist_data *pgdat,
-						int zone_index)
-{
-	unsigned long pages = 0;
-	if (zone_index == memmap_zone_idx(pgdat->node_mem_map)) {
-		pages = pgdat->node_spanned_pages;
-		pages = (pages * sizeof(struct page)) >> PAGE_SHIFT;
-		printk(KERN_DEBUG "%lu pages used for memmap\n", pages);
-	}
-	return pages;
-}
-#else
-/* Account for mem_map for CONFIG_SPARSEMEM */
-unsigned long account_memmap(struct pglist_data *pgdat, int zone_index)
-{
-	unsigned long pages = 0;
-	unsigned long memmap_pfn;
-	struct page *memmap_addr;
-	int pnum;
-	unsigned long pgdat_startpfn, pgdat_endpfn;
-	struct mem_section *section;
-
-	pgdat_startpfn = pgdat->node_start_pfn;
-	pgdat_endpfn = pgdat_startpfn + pgdat->node_spanned_pages;
-
-	/* Go through valid sections looking for memmap */
-	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
-		if (!valid_section_nr(pnum))
-			continue;
-
-		section = __nr_to_section(pnum);
-		if (!section_has_mem_map(section))
-			continue;
-
-		memmap_addr = __section_mem_map_addr(section);
-		memmap_pfn = (unsigned long)memmap_addr >> PAGE_SHIFT;
-
-		if (memmap_pfn < pgdat_startpfn || memmap_pfn >= pgdat_endpfn)
-			continue;
-
-		if (zone_index == memmap_zone_idx(memmap_addr))
-			pages += (PAGES_PER_SECTION * sizeof(struct page));
-	}
-
-	pages >>= PAGE_SHIFT;
-	printk(KERN_DEBUG "%lu pages used for SPARSE memmap\n", pages);
-	return pages;
-}
-#endif
-
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -2437,17 +2385,32 @@ static void __meminit free_area_init_cor
 	
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
-		unsigned long size, realsize;
+		unsigned long size, realsize, memmap_pages;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
 								zholes_size);
 
-		realsize -= account_memmap(pgdat, j);
+		/*
+		 * Adjust realsize so that it accounts for how much memory
+		 * is used by this zone for memmap. This affects the watermark
+		 * and per-cpu initialisations
+		 */
+		memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
+		if (realsize >= memmap_pages) {
+			realsize -= memmap_pages;
+			printk(KERN_DEBUG
+				"  %s zone: %lu pages used for memmap\n",
+				zone_names[j], memmap_pages);
+		} else
+			printk(KERN_WARNING
+				"  %s zone: %lu pages exceeds realsize %lu\n",
+				zone_names[j], memmap_pages, realsize);
+
 		/* Account for reserved DMA pages */
 		if (j == ZONE_DMA && realsize > dma_reserve) {
 			realsize -= dma_reserve;
-			printk(KERN_DEBUG "%lu pages DMA reserved\n",
+			printk(KERN_DEBUG "  DMA zone: %lu pages reserved\n",
 								dma_reserve);
 		}
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux