[PATCH] BUG: gfp_zone() not mapping zone modifiers correctly and bad ordering of fallback lists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Andrew,

This patch is divided into two parts and addresses a bug in how zone
fallback lists are calculated and how __GFP_* zone modifiers are mapped to
their equivilant ZONE_* type. It applies to 2.6.15-mm3 and has been tested
on x86 and ppc64. It has been reported by Yasunori Goto that it boots on
ia64. Details as follows;

build_zonelists() attempts to be smart, and uses highest_zone() so that it
doesn't attempt to call build_zonelists_node() for empty zones.  However,
build_zonelists_node() is smart enough to do the right thing by itself and
build_zonelists() already has the zone index that highest_zone() is meant
to provide. So, remove the unnecessary function highest_zone().

The helper function gfp_zone() assumes that the bits used in the zone modifier
of a GFP flag maps directory on to their ZONE_* equivalent and just applies a
mask. However, the bits do not map directly and the wrong fallback lists can
be used. If unluckly, the system can go OOM when plenty of suitable memory
is available. This patch redefines the __GFP_ zone modifier flags to allow
a simple mapping to their equivilant ZONE_ type.

Signed-off-by: Mel Gorman <[email protected]>
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.15-mm3-clean/include/linux/gfp.h linux-2.6.15-mm3-001_fallbacks/include/linux/gfp.h
--- linux-2.6.15-mm3-clean/include/linux/gfp.h	2006-01-11 14:24:18.000000000 +0000
+++ linux-2.6.15-mm3-001_fallbacks/include/linux/gfp.h	2006-01-13 09:18:09.000000000 +0000
@@ -11,15 +11,19 @@ struct vm_area_struct;
 /*
  * GFP bitmasks..
  */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
-#define __GFP_DMA	((__force gfp_t)0x01u)
-#define __GFP_HIGHMEM	((__force gfp_t)0x02u)
+/*
+ * Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) 
+ * The values here are mapped to their equivilant ZONE_* type by gfp_zone
+ * which makes assumptions on the value of these bits.
+ */
+#define __GFP_DMA	((__force gfp_t)0x02u)
+#define __GFP_HIGHMEM	((__force gfp_t)0x01u)
 #ifdef CONFIG_DMA_IS_DMA32
-#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
+#define __GFP_DMA32	((__force gfp_t)0x02u)	/* ZONE_DMA is ZONE_DMA32 */
 #elif BITS_PER_LONG < 64
-#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
+#define __GFP_DMA32	((__force gfp_t)0x00u)	/* ZONE_NORMAL is ZONE_DMA32 */
 #else
-#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
+#define __GFP_DMA32	((__force gfp_t)0x03u)	/* Has own ZONE_DMA32 */
 #endif
 
 /*
@@ -75,10 +79,15 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 
+/**
+ * GFP zone modifiers need to be mapped to their equivilant ZONE_ value defined
+ * in linux/mmzone.h to determine what fallback list should be used for the
+ * allocation.
+ */
 static inline int gfp_zone(gfp_t gfp)
 {
-	int zone = GFP_ZONEMASK & (__force int) gfp;
-	BUG_ON(zone >= GFP_ZONETYPES);
+	int zone = ((__force int) gfp & GFP_ZONEMASK) ^ 0x2;
+	BUG_ON(zone >= MAX_NR_ZONES);
 	return zone;
 }
 
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.15-mm3-clean/mm/page_alloc.c linux-2.6.15-mm3-001_fallbacks/mm/page_alloc.c
--- linux-2.6.15-mm3-clean/mm/page_alloc.c	2006-01-11 14:24:18.000000000 +0000
+++ linux-2.6.15-mm3-001_fallbacks/mm/page_alloc.c	2006-01-13 09:10:17.000000000 +0000
@@ -1577,18 +1577,6 @@ static int __init build_zonelists_node(p
 	return nr_zones;
 }
 
-static inline int highest_zone(int zone_bits)
-{
-	int res = ZONE_NORMAL;
-	if (zone_bits & (__force int)__GFP_HIGHMEM)
-		res = ZONE_HIGHMEM;
-	if (zone_bits & (__force int)__GFP_DMA32)
-		res = ZONE_DMA32;
-	if (zone_bits & (__force int)__GFP_DMA)
-		res = ZONE_DMA;
-	return res;
-}
-
 #ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __initdata node_load[MAX_NUMNODES];
@@ -1690,13 +1678,11 @@ static void __init build_zonelists(pg_da
 			node_load[node] += load;
 		prev_node = node;
 		load--;
-		for (i = 0; i < GFP_ZONETYPES; i++) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
 			zonelist = pgdat->node_zonelists + i;
 			for (j = 0; zonelist->zones[j] != NULL; j++);
 
-			k = highest_zone(i);
-
-	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 			zonelist->zones[j] = NULL;
 		}
 	}
@@ -1706,17 +1692,16 @@ static void __init build_zonelists(pg_da
 
 static void __init build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, k, node, local_node;
+	int i, j, node, local_node;
 
 	local_node = pgdat->node_id;
-	for (i = 0; i < GFP_ZONETYPES; i++) {
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zonelist *zonelist;
 
 		zonelist = pgdat->node_zonelists + i;
 
 		j = 0;
-		k = highest_zone(i);
- 		j = build_zonelists_node(pgdat, zonelist, j, k);
+ 		j = build_zonelists_node(pgdat, zonelist, j, i);
  		/*
  		 * Now we build the zonelist so that it contains the zones
  		 * of all the other nodes.
@@ -1728,12 +1713,12 @@ static void __init build_zonelists(pg_da
 		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 		for (node = 0; node < local_node; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 
 		zonelist->zones[j] = NULL;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux