Here's a dumb simple implementation of fake NUMA nodes for PowerPC. Fake
NUMA nodes can be specified using the following command line option
numa=fake=<node range>
node range is of the format <range1>,<range2>,...<rangeN>
Each of the rangeX parameters is passed using memparse(). I find the patch
useful for fake NUMA emulation on my simple PowerPC machine. I've tested it
on a non-numa box with the following arguments
numa=fake=1G
numa=fake=1G,2G
name=fake=1G,512M,2G
numa=fake=1500M,2800M mem=3500M
numa=fake=1G mem=512M
numa=fake=1G mem=1G
This patch applies on top of 2.6.24-rc4.
All though I've tried my best to handle some of the architecture specific
details of PowerPC, I might have overlooked something obvious, like the usage
of an API or some architecture tweaks. The patch depends on CONFIG_NUMA and
I decided against creating a separate config option for fake NUMA to keep
the code simple.
Comments are as always welcome!
Signed-off-by: Balbir Singh <[email protected]>
---
arch/powerpc/mm/numa.c | 55 ++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 50 insertions(+), 5 deletions(-)
diff -puN arch/powerpc/mm/numa.c~ppc-fake-numa-easy arch/powerpc/mm/numa.c
--- linux-2.6.24-rc4-mm1/arch/powerpc/mm/numa.c~ppc-fake-numa-easy 2007-12-07 21:25:55.000000000 +0530
+++ linux-2.6.24-rc4-mm1-balbir/arch/powerpc/mm/numa.c 2007-12-08 02:36:02.000000000 +0530
@@ -24,6 +24,8 @@
static int numa_enabled = 1;
+char *cmdline __initdata;
+
static int numa_debug;
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
@@ -39,6 +41,40 @@ static bootmem_data_t __initdata plat_no
static int min_common_depth;
static int n_mem_addr_cells, n_mem_size_cells;
+static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+ unsigned int *nid)
+{
+ unsigned long long mem;
+ char *p = cmdline;
+ static unsigned int fake_nid = 0;
+ static unsigned long long curr_boundary = 0;
+
+ *nid = fake_nid;
+ mem = memparse(p, &p);
+ if (!mem)
+ return 0;
+
+ if (mem < curr_boundary)
+ return 0;
+
+ curr_boundary = mem;
+
+ if ((end_pfn << PAGE_SHIFT) > mem) {
+ /*
+ * Skip commas and spaces
+ */
+ while (*p == ',' || *p == ' ' || *p == '\t')
+ p++;
+
+ cmdline = p;
+ fake_nid++;
+ *nid = fake_nid;
+ dbg("created new fake_node with id %d\n", fake_nid);
+ return 1;
+ }
+ return 0;
+}
+
static void __cpuinit map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -344,12 +380,14 @@ static void __init parse_drconf_memory(s
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = default_nid;
}
- node_set_online(nid);
size = numa_enforce_memory_limit(start, lmb_size);
if (!size)
continue;
+ fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+ node_set_online(nid);
+
add_active_range(nid, start >> PAGE_SHIFT,
(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
}
@@ -429,7 +467,6 @@ new_range:
nid = of_node_to_nid_single(memory);
if (nid < 0)
nid = default_nid;
- node_set_online(nid);
if (!(size = numa_enforce_memory_limit(start, size))) {
if (--ranges)
@@ -438,6 +475,9 @@ new_range:
continue;
}
+ fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+ node_set_online(nid);
+
add_active_range(nid, start >> PAGE_SHIFT,
(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
@@ -461,7 +501,7 @@ static void __init setup_nonnuma(void)
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
unsigned long start_pfn, end_pfn;
- unsigned int i;
+ unsigned int i, nid = 0;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
@@ -471,9 +511,11 @@ static void __init setup_nonnuma(void)
for (i = 0; i < lmb.memory.cnt; ++i) {
start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
- add_active_range(0, start_pfn, end_pfn);
+
+ fake_numa_create_new_node(end_pfn, &nid);
+ add_active_range(nid, start_pfn, end_pfn);
+ node_set_online(nid);
}
- node_set_online(0);
}
void __init dump_numa_cpu_topology(void)
@@ -702,6 +744,9 @@ static int __init early_numa(char *p)
if (strstr(p, "debug"))
numa_debug = 1;
+ if (strstr(p, "fake="))
+ cmdline = p + 5; /* 5 is faster than strlen("fake=") */
+
return 0;
}
early_param("numa", early_numa);
_
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]