[PATCH 05/11] memory hotplug locking: node_size_lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



pgdat->node_size_lock is basically only neeeded in one place in the
normal code: show_mem(), which is the arch-specific sysrq-m printing
function.

Strictly speaking, the architectures not doing memory hotplug do
no need this locking in show_mem().  However, they are all included
for completeness.  This should also make any future consolidation
of all of the implementations a little more straightforward.

This lock is also held in the sparsemem code during a memory removal,
as sections are invalidated.  This is the place there pfn_valid() is
made false for a memory area that's being removed.  The lock is
only required when doing pfn_valid() operations on memory which the
user does not already have a reference on the page, such as in
show_mem().

Signed-off-by: Dave Hansen <[email protected]>
---

 memhotplug-dave/arch/alpha/mm/numa.c           |    3 ++
 memhotplug-dave/arch/i386/mm/pgtable.c         |    3 ++
 memhotplug-dave/arch/ia64/mm/discontig.c       |    7 ++++-
 memhotplug-dave/arch/m32r/mm/init.c            |    9 +++++-
 memhotplug-dave/arch/parisc/mm/init.c          |    3 ++
 memhotplug-dave/arch/ppc64/mm/init.c           |    6 ++++
 memhotplug-dave/include/linux/memory_hotplug.h |   34 +++++++++++++++++++++++++
 memhotplug-dave/include/linux/mmzone.h         |   12 ++++++++
 memhotplug-dave/mm/page_alloc.c                |    1 
 9 files changed, 76 insertions(+), 2 deletions(-)

diff -puN arch/alpha/mm/numa.c~C5.2-pgdat_size_lock arch/alpha/mm/numa.c
--- memhotplug/arch/alpha/mm/numa.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/alpha/mm/numa.c	2005-09-02 12:42:10.000000000 -0700
@@ -371,6 +371,8 @@ show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		i = node_spanned_pages(nid);
 		while (i-- > 0) {
 			struct page *page = nid_page_nr(nid, i);
@@ -384,6 +386,7 @@ show_mem(void)
 			else
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
 	}
 	printk("%ld pages of RAM\n",total);
 	printk("%ld free pages\n",free);
diff -puN arch/i386/mm/pgtable.c~C5.2-pgdat_size_lock arch/i386/mm/pgtable.c
--- memhotplug/arch/i386/mm/pgtable.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/i386/mm/pgtable.c	2005-09-02 12:42:10.000000000 -0700
@@ -31,11 +31,13 @@ void show_mem(void)
 	pg_data_t *pgdat;
 	unsigned long i;
 	struct page_state ps;
+	unsigned long flags;
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -48,6 +50,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk(KERN_INFO "%d pages of RAM\n", total);
 	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
diff -puN arch/ia64/mm/discontig.c~C5.2-pgdat_size_lock arch/ia64/mm/discontig.c
--- memhotplug/arch/ia64/mm/discontig.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/ia64/mm/discontig.c	2005-09-02 13:43:06.000000000 -0700
@@ -524,9 +524,13 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
-		unsigned long present = pgdat->node_present_pages;
+		unsigned long present;
+		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+
 		printk("Node ID: %d\n", pgdat->node_id);
+		pgdat_resize_lock(pgdat, &flags);
+		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
 			struct page *page = pgdat_page_nr(pgdat, i);
 			if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
@@ -538,6 +542,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page)-1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 		total_present += present;
 		total_reserved += reserved;
 		total_cached += cached;
diff -puN arch/m32r/mm/init.c~C5.2-pgdat_size_lock arch/m32r/mm/init.c
--- memhotplug/arch/m32r/mm/init.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/m32r/mm/init.c	2005-09-02 12:42:10.000000000 -0700
@@ -48,6 +48,8 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -60,6 +62,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk("%d pages of RAM\n", total);
 	printk("%d pages of HIGHMEM\n",highmem);
@@ -150,10 +153,14 @@ int __init reservedpages_count(void)
 	int reservedpages, nid, i;
 
 	reservedpages = 0;
-	for_each_online_node(nid)
+	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
 			if (PageReserved(nid_page_nr(nid, i)))
 				reservedpages++;
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
+	}
 
 	return reservedpages;
 }
diff -puN arch/parisc/mm/init.c~C5.2-pgdat_size_lock arch/parisc/mm/init.c
--- memhotplug/arch/parisc/mm/init.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/parisc/mm/init.c	2005-09-02 12:42:10.000000000 -0700
@@ -505,7 +505,9 @@ void show_mem(void)
 
 		for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
 			struct page *p;
+			unsigned long flags;
 
+			pgdat_resize_lock(NODE_DATA(i), &flags);
 			p = nid_page_nr(i, j) - node_start_pfn(i);
 
 			total++;
@@ -517,6 +519,7 @@ void show_mem(void)
 				free++;
 			else
 				shared += page_count(p) - 1;
+			pgdat_resize_unlock(NODE_DATA(i), &flags);
         	}
 	}
 #endif
diff -puN arch/ppc64/mm/init.c~C5.2-pgdat_size_lock arch/ppc64/mm/init.c
--- memhotplug/arch/ppc64/mm/init.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/arch/ppc64/mm/init.c	2005-09-02 13:43:08.000000000 -0700
@@ -104,6 +104,8 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -114,6 +116,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk("%ld pages of RAM\n", total);
 	printk("%ld reserved pages\n", reserved);
@@ -648,11 +651,14 @@ void __init mem_init(void)
 #endif
 
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			if (PageReserved(page))
 				reservedpages++;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 
 	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
diff -puN /dev/null include/linux/memory_hotplug.h
--- /dev/null	2005-03-30 22:36:15.000000000 -0800
+++ memhotplug-dave/include/linux/memory_hotplug.h	2005-09-02 13:43:13.000000000 -0700
@@ -0,0 +1,34 @@
+#ifndef __LINUX_MEMORY_HOTPLUG_H
+#define __LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/mmzone.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+	spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+	spin_lock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+	spin_lock_init(&pgdat->node_size_lock);
+}
+#else /* ! CONFIG_MEMORY_HOTPLUG */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+#endif
+#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff -puN include/linux/mmzone.h~C5.2-pgdat_size_lock include/linux/mmzone.h
--- memhotplug/include/linux/mmzone.h~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/include/linux/mmzone.h	2005-09-02 13:43:13.000000000 -0700
@@ -273,6 +273,16 @@ typedef struct pglist_data {
 	struct page *node_mem_map;
 #endif
 	struct bootmem_data *bdata;
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/*
+	 * Must be held any time you expect node_start_pfn, node_present_pages
+	 * or node_spanned_pages stay constant.  Holding this will also
+	 * guarantee that any pfn_valid() stays that way.
+	 *
+	 * Nests above zone->lock and zone->size_seqlock.
+	 */
+	spinlock_t node_size_lock;
+#endif
 	unsigned long node_start_pfn;
 	unsigned long node_present_pages; /* total number of physical pages */
 	unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +303,8 @@ typedef struct pglist_data {
 #endif
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
+#include <linux/memory_hotplug.h>
+
 extern struct pglist_data *pgdat_list;
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
diff -puN mm/page_alloc.c~C5.2-pgdat_size_lock mm/page_alloc.c
--- memhotplug/mm/page_alloc.c~C5.2-pgdat_size_lock	2005-09-02 12:42:10.000000000 -0700
+++ memhotplug-dave/mm/page_alloc.c	2005-09-02 13:43:13.000000000 -0700
@@ -1948,6 +1948,7 @@ static void __init free_area_init_core(s
 	int nid = pgdat->node_id;
 	unsigned long zone_start_pfn = pgdat->node_start_pfn;
 
+	pgdat_resize_init(pgdat);
 	pgdat->nr_zones = 0;
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	pgdat->kswapd_max_order = 0;
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]
  Powered by Linux