[RFC] [PATCH] virtual memmap on sparsemem v3 [2/4] generic virtual mem_map on sparsemem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch implements of virtual mem_map on sparsemem.
This includes only arch independent part and depends on
generic map/unmap in the kernel function in this patch series.

Usual sparsemem(_extreme) have to do global table look up in
pfn_to_page()/page_to_pfn(), this seems a bit costly.

If an arch has enough address space to map all mem_map in linear,
it is good to map sprase mem_map as linear mem_map. This redcuces
cost of pfn_to_page()/page_to_pfn().
This concept is used by ia64's VIRTUAL_MEM_MAP.

pfn_valid() works as same as usual sparsemem.

callbacks to create vmem_map are used for using alloc_bootmem_node() for
allocationg pud/pmd/pte.

How to use:
fix struct page *mem_map's pointing address before calling sparse_init().
that's all.

Note:
I assumes that mem_map per each section is always aligned to PAGE_SIZE.
For example, ia64.
sizeof(struct page) = 56 && PAGES_PER_SECTION=65536. Then mem_map per
section is aligned to 56 * 65536 bytes.
#error will detect this.

Signed-Off-By: KAMEZAWA Hiruyoki <[email protected]>


Index: devel-2.6.19/mm/sparse.c
===================================================================
--- devel-2.6.19.orig/mm/sparse.c	2006-11-30 06:57:37.000000000 +0900
+++ devel-2.6.19/mm/sparse.c	2006-12-08 15:03:02.000000000 +0900
@@ -9,6 +9,8 @@
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <asm/dma.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 
 /*
  * Permanent SPARSEMEM data:
@@ -76,6 +78,106 @@
 }
 #endif
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+
+struct vmemmap_create_arg {
+	int section_nr;
+	int nid;
+};
+
+/* call backs for memory map */
+static int
+__init pte_alloc_vmemmap_boot(pmd_t *pmd, unsigned long addr, void *data)
+{
+	struct vmemmap_create_arg *arg = data;
+	void *pg = alloc_bootmem_pages_node(NODE_DATA(arg->nid), PAGE_SIZE);
+	BUG_ON(!pg);
+	pmd_populate_kernel(&init_mm, pmd, pg);
+	return 0;
+}
+static int
+__init pmd_alloc_vmemmap_boot(pud_t *pud, unsigned long addr, void *data)
+{
+	struct vmemmap_create_arg *arg = data;
+	void *pg = alloc_bootmem_pages_node(NODE_DATA(arg->nid), PAGE_SIZE);
+	BUG_ON(!pg);
+	pud_populate(&init_mm, pud, pg);
+	return 0;
+}
+
+static int
+__init pud_alloc_vmemmap_boot(pgd_t *pgd, unsigned long addr, void *data)
+{
+	struct vmemmap_create_arg *arg = data;
+	void *pg = alloc_bootmem_pages_node(NODE_DATA(arg->nid), PAGE_SIZE);
+	BUG_ON(!pg);
+	pgd_populate(&init_mm, pgd, pg);
+	return 0;
+}
+
+static int
+__init pte_set_vmemmap_boot(pte_t *pte, unsigned long addr, void *data)
+{
+	struct vmemmap_create_arg *arg = data;
+	struct mem_section *ms = __nr_to_section(arg->section_nr);
+	unsigned long pmap, vmap, section_pfn, pfn;
+
+	section_pfn = section_nr_to_pfn(arg->section_nr);
+	/* we already have mem_map in linear address space. calc it */
+
+	/* decode encoded value of base address. */
+	pmap = ms->section_mem_map & SECTION_MAP_MASK;
+	pmap = (unsigned long)((struct page *)pmap + section_pfn);
+	/* section's start */
+	vmap = (unsigned long)pfn_to_page(section_pfn);
+
+	pfn = (__pa(pmap) + (addr - vmap)) >> PAGE_SHIFT;
+	set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+	return 0;
+}
+
+static int
+__init pte_clear_vmemmap(pte_t *pte, unsigned long addr, void *data)
+{
+	BUG();
+}
+
+struct gen_map_kern_ops vmemmap_boot_ops = {
+	.k_pte_set	= pte_set_vmemmap_boot,
+	.k_pte_clear	= pte_clear_vmemmap,
+	.k_pud_alloc	= pud_alloc_vmemmap_boot,
+	.k_pmd_alloc	= pmd_alloc_vmemmap_boot,
+	.k_pte_alloc	= pte_alloc_vmemmap_boot,
+};
+
+static int
+__init map_virtual_mem_map(unsigned long section, int nid)
+{
+	struct vmemmap_create_arg arg;
+	unsigned long vmap_start, vmap_size;
+	vmap_start = (unsigned long)pfn_to_page(section_nr_to_pfn(section));
+	vmap_size = PAGES_PER_SECTION * sizeof(struct page);
+	arg.section_nr = section;
+	arg.nid = nid;
+
+	if (system_state == SYSTEM_BOOTING) {
+		map_generic_kernel(vmap_start, vmap_size, &vmemmap_boot_ops,
+				   &arg);
+	} else {
+		BUG();
+	}
+	/* if bug, panic occurs.*/
+	return 0;
+}
+#else
+static int
+__init map_virtual_mem_map(unsigned long section, int nid)
+{
+	return 0;
+}
+#endif
+
+
 /*
  * Although written for the SPARSEMEM_EXTREME case, this happens
  * to also work for the flat array case becase
@@ -92,7 +194,7 @@
 			continue;
 
 		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
-		     break;
+			break;
 	}
 
 	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
@@ -175,13 +277,14 @@
 }
 
 static int sparse_init_one_section(struct mem_section *ms,
-		unsigned long pnum, struct page *mem_map)
+		unsigned long pnum, struct page *mem_map, int node)
 {
 	if (!valid_section(ms))
 		return -EINVAL;
 
 	ms->section_mem_map &= ~SECTION_MAP_MASK;
 	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
+	map_virtual_mem_map(pnum, node);
 
 	return 1;
 }
@@ -261,7 +364,8 @@
 		map = sparse_early_mem_map_alloc(pnum);
 		if (!map)
 			continue;
-		sparse_init_one_section(__nr_to_section(pnum), pnum, map);
+		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
+				sparse_early_nid(__nr_to_section(pnum)));
 	}
 }
 
@@ -296,7 +400,7 @@
 	}
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
-	ret = sparse_init_one_section(ms, section_nr, memmap);
+	ret = sparse_init_one_section(ms, section_nr, memmap, pgdat->node_id);
 
 out:
 	pgdat_resize_unlock(pgdat, &flags);
Index: devel-2.6.19/mm/Kconfig
===================================================================
--- devel-2.6.19.orig/mm/Kconfig	2006-11-30 06:57:37.000000000 +0900
+++ devel-2.6.19/mm/Kconfig	2006-12-08 15:05:10.000000000 +0900
@@ -112,12 +112,22 @@
 	def_bool y
 	depends on SPARSEMEM && !SPARSEMEM_STATIC
 
+config SPARSEMEM_VMEMMAP
+	bool	"Virutally contiguous mem_map on sparsemem"
+	depends on SPARSEMEM && !SPARSEMEM_STATIC && ARCH_SPARSEMEM_VMEMMAP
+	help
+	  This allows micro optimization to reduce costs of accessing
+	  infrastructure of memory management.
+	  But this consumes huge amount of virtual memory(not physical).
+	  This option is selectable only if your arch supports it.
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
 	depends on SPARSEMEM || X86_64_ACPI_NUMA
 	depends on HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
 	depends on (IA64 || X86 || PPC64)
+	depends on !SPARSEMEM_VMEMMAP
 
 comment "Memory hotplug is currently incompatible with Software Suspend"
 	depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
Index: devel-2.6.19/include/linux/mmzone.h
===================================================================
--- devel-2.6.19.orig/include/linux/mmzone.h	2006-11-30 06:57:37.000000000 +0900
+++ devel-2.6.19/include/linux/mmzone.h	2006-12-08 15:04:30.000000000 +0900
@@ -311,7 +311,7 @@
 };
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
-#ifndef CONFIG_DISCONTIGMEM
+#if !defined(CONFIG_DISCONTIGMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 /* The array of struct pages - for discontigmem use pgdat->lmem_map */
 extern struct page *mem_map;
 #endif
@@ -614,6 +614,13 @@
 #define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
 #define SECTION_NID_SHIFT	2
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#if (((BITS_PER_LONG/4) * PAGES_PER_SECTION) % PAGE_SIZE) != 0
+#error "PAGE_SIZE/SECTION_SIZE relationship is not suitable for vmem_map"
+#endif
+extern struct page* mem_map;
+#endif
+
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
 	unsigned long map = section->section_mem_map;
Index: devel-2.6.19/include/asm-generic/memory_model.h
===================================================================
--- devel-2.6.19.orig/include/asm-generic/memory_model.h	2006-11-30 06:57:37.000000000 +0900
+++ devel-2.6.19/include/asm-generic/memory_model.h	2006-12-08 15:03:02.000000000 +0900
@@ -47,6 +47,11 @@
 })
 
 #elif defined(CONFIG_SPARSEMEM)
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define __page_to_pfn(pg)		((pg) - mem_map)
+#define __pfn_to_page(pfn)		(mem_map + (pfn))
+#else
 /*
  * Note: section's mem_map is encorded to reflect its start_pfn.
  * section[i].section_mem_map == mem_map's address - start_pfn;
@@ -62,6 +67,7 @@
 	struct mem_section *__sec = __pfn_to_section(__pfn);	\
 	__section_mem_map_addr(__sec) + __pfn;		\
 })
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
 #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
Index: devel-2.6.19/mm/memory.c
===================================================================
--- devel-2.6.19.orig/mm/memory.c	2006-11-30 06:57:37.000000000 +0900
+++ devel-2.6.19/mm/memory.c	2006-12-08 15:03:02.000000000 +0900
@@ -69,6 +69,12 @@
 EXPORT_SYMBOL(mem_map);
 #endif
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/* for the virtual mem_map */
+struct page *mem_map;
+EXPORT_SYMBOL(mem_map);
+#endif
+
 unsigned long num_physpages;
 /*
  * A number of key systems in x86 including ioremap() rely on the assumption

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux