Re: [RFC] Event counters [3/3]: Convert NUMA counters to event counters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 20 Dec 2005, Christoph Lameter wrote:

> no longer be available (maybe one could improvise one with the stats of
> the processors on the node?)

Here is such an alternate patch. It creates a per cpu vmstat in 
/sys/devices/system/cpu/cpu<nr>/vmstat (omitting the zoned counters which
are per zone and therefore node specific). 
This allows one to observe VM related events occurring on a specific 
processor

It also preserves /sys/devices/system/node/node<nr>/numa_stats which will
contain the summed up event counters for all the processors on the node.

This will preserve the existing numa_miss and numa_other fields and add
lots of other information that may be useful.

There is a global numa_miss and numa_other field available in /proc/vmstat
with both these patches. The fields may be helpful to check for fast 
checks if allocation requests are properly satisfied.

Index: linux-2.6.15-rc5-mm3/include/linux/mmzone.h
===================================================================
--- linux-2.6.15-rc5-mm3.orig/include/linux/mmzone.h	2005-12-20 13:15:44.000000000 -0800
+++ linux-2.6.15-rc5-mm3/include/linux/mmzone.h	2005-12-21 13:28:13.000000000 -0800
@@ -79,14 +79,6 @@ struct per_cpu_pageset {
 	s8 vm_stat_diff[NR_STAT_ITEMS];
 #endif
 
-#ifdef CONFIG_NUMA
-	unsigned long numa_hit;		/* allocated in intended node */
-	unsigned long numa_miss;	/* allocated in non intended node */
-	unsigned long numa_foreign;	/* was intended here, hit elsewhere */
-	unsigned long interleave_hit; 	/* interleaver prefered this zone */
-	unsigned long local_node;	/* allocation from local node */
-	unsigned long other_node;	/* allocation from other node */
-#endif
 } ____cacheline_aligned_in_smp;
 
 #ifdef CONFIG_NUMA
Index: linux-2.6.15-rc5-mm3/include/linux/page-flags.h
===================================================================
--- linux-2.6.15-rc5-mm3.orig/include/linux/page-flags.h	2005-12-20 14:55:00.000000000 -0800
+++ linux-2.6.15-rc5-mm3/include/linux/page-flags.h	2005-12-21 14:03:13.000000000 -0800
@@ -103,6 +103,9 @@ enum event_item { PGPGIN, PGPGOUT, PSWPI
 		FOR_ALL_ZONES(PGSCAN_DIRECT),
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_NUMA
+		NUMA_MISS, NUMA_OFF_NODE,
+#endif
 		NR_EVENT_ITEMS
 };
 
@@ -131,6 +134,8 @@ static inline void count_events(enum eve
 	__get_cpu_var(event_states).event[item] += delta;
 }
 
+extern char *vmstat_text[];
+
 #else
 /* Disable counters */
 #define get_cpu_events(e)	0L
Index: linux-2.6.15-rc5-mm3/mm/mempolicy.c
===================================================================
--- linux-2.6.15-rc5-mm3.orig/mm/mempolicy.c	2005-12-16 11:44:09.000000000 -0800
+++ linux-2.6.15-rc5-mm3/mm/mempolicy.c	2005-12-21 13:28:13.000000000 -0800
@@ -1072,15 +1072,9 @@ static struct page *alloc_page_interleav
 					unsigned nid)
 {
 	struct zonelist *zl;
-	struct page *page;
 
 	zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
-	page = __alloc_pages(gfp, order, zl);
-	if (page && page_zone(page) == zl->zones[0]) {
-		zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
-		put_cpu();
-	}
-	return page;
+	return __alloc_pages(gfp, order, zl);
 }
 
 /**
Index: linux-2.6.15-rc5-mm3/mm/page_alloc.c
===================================================================
--- linux-2.6.15-rc5-mm3.orig/mm/page_alloc.c	2005-12-20 15:46:51.000000000 -0800
+++ linux-2.6.15-rc5-mm3/mm/page_alloc.c	2005-12-21 13:40:34.000000000 -0800
@@ -989,24 +989,16 @@ void mark_free_pages(struct zone *zone)
 
 #endif /* CONFIG_PM */
 
-static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int pages)
 {
 #ifdef CONFIG_NUMA
 	pg_data_t *pg = z->zone_pgdat;
-	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
-	struct per_cpu_pageset *p;
 
-	p = zone_pcp(z, cpu);
-	if (pg == orig) {
-		p->numa_hit++;
-	} else {
-		p->numa_miss++;
-		zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
-	}
-	if (pg == NODE_DATA(numa_node_id()))
-		p->local_node++;
-	else
-		p->other_node++;
+	if (pg != zonelist->zones[0]->zone_pgdat)
+		count_events(NUMA_MISS, pages);
+
+	if (pg != NODE_DATA(numa_node_id()))
+		count_events(NUMA_OFF_NODE, pages);
 #endif
 }
 
@@ -1098,7 +1090,7 @@ again:
 	}
 
 	count_zone_events(PGALLOC, zone, 1 << order);
-	zone_statistics(zonelist, zone, cpu);
+	zone_statistics(zonelist, zone, 1 << order);
 	local_irq_restore(flags);
 	put_cpu();
 
@@ -2586,21 +2578,6 @@ static int zoneinfo_show(struct seq_file
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}
-#ifdef CONFIG_NUMA
-			seq_printf(m,
-				   "\n            numa_hit:       %lu"
-				   "\n            numa_miss:      %lu"
-				   "\n            numa_foreign:   %lu"
-				   "\n            interleave_hit: %lu"
-				   "\n            local_node:     %lu"
-				   "\n            other_node:     %lu",
-				   pageset->numa_hit,
-				   pageset->numa_miss,
-				   pageset->numa_foreign,
-				   pageset->interleave_hit,
-				   pageset->local_node,
-				   pageset->other_node);
-#endif
 		}
 		seq_printf(m,
 			   "\n  all_unreclaimable: %u"
@@ -2625,7 +2602,7 @@ struct seq_operations zoneinfo_op = {
 	.show	= zoneinfo_show,
 };
 
-static char *vmstat_text[] = {
+char *vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_mapped",
 	"nr_pagecache",
@@ -2681,7 +2658,11 @@ static char *vmstat_text[] = {
 	"pageoutrun",
 	"allocstall",
 
-	"pgrotated"
+	"pgrotated",
+#ifdef CONFIG_NUMA
+	"numa_miss",
+	"other_node"
+#endif
 #endif
 };
 
Index: linux-2.6.15-rc5-mm3/drivers/base/node.c
===================================================================
--- linux-2.6.15-rc5-mm3.orig/drivers/base/node.c	2005-12-20 13:15:45.000000000 -0800
+++ linux-2.6.15-rc5-mm3/drivers/base/node.c	2005-12-21 14:01:38.000000000 -0800
@@ -93,41 +93,16 @@ static SYSDEV_ATTR(meminfo, S_IRUGO, nod
 
 static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
 {
-	unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign;
-	unsigned long local_node, other_node;
-	int i, cpu;
-	pg_data_t *pg = NODE_DATA(dev->id);
-	numa_hit = 0;
-	numa_miss = 0;
-	interleave_hit = 0;
-	numa_foreign = 0;
-	local_node = 0;
-	other_node = 0;
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		struct zone *z = &pg->node_zones[i];
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			struct per_cpu_pageset *ps = zone_pcp(z,cpu);
-			numa_hit += ps->numa_hit;
-			numa_miss += ps->numa_miss;
-			numa_foreign += ps->numa_foreign;
-			interleave_hit += ps->interleave_hit;
-			local_node += ps->local_node;
-			other_node += ps->other_node;
-		}
-	}
-	return sprintf(buf,
-		       "numa_hit %lu\n"
-		       "numa_miss %lu\n"
-		       "numa_foreign %lu\n"
-		       "interleave_hit %lu\n"
-		       "local_node %lu\n"
-		       "other_node %lu\n",
-		       numa_hit,
-		       numa_miss,
-		       numa_foreign,
-		       interleave_hit,
-		       local_node,
-		       other_node);
+	unsigned long v[NR_EVENT_ITEMS];
+	int i;
+	char *p = buf;
+
+	sum_events(v, &node_to_cpumask(dev->id));
+
+	for (i = 0; i < NR_EVENT_ITEMS; i++)
+		p += sprintf(p, "%s %ld\n", vmstat_text[NR_STAT_ITEMS + i],
+				v[i]);
+	return p - buf;
 }
 static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
 
Index: linux-2.6.15-rc5-mm3/drivers/base/cpu.c
===================================================================
--- linux-2.6.15-rc5-mm3.orig/drivers/base/cpu.c	2005-12-16 11:44:06.000000000 -0800
+++ linux-2.6.15-rc5-mm3/drivers/base/cpu.c	2005-12-21 13:59:35.000000000 -0800
@@ -8,6 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/topology.h>
 #include <linux/device.h>
+#include <linux/page-flags.h>
 
 #include "base.h"
 
@@ -83,6 +84,26 @@ static inline void register_cpu_control(
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t cpu_read_vmstat(struct sys_device * dev, char * buf)
+{
+	int i;
+	char *p = buf;
+	cpumask_t mask;
+	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	unsigned long v[NR_EVENT_ITEMS];
+
+	cpus_clear(mask);
+	cpu_set(cpu->sysdev.id, mask);
+	sum_events(v, &mask);
+
+	for (i = 0; i < NR_EVENT_ITEMS; i++)
+		p += sprintf(p, "%s %ld\n",
+			vmstat_text[NR_STAT_ITEMS + i], v[i]);
+	return p - buf;
+}
+static SYSDEV_ATTR(vmstat, S_IRUGO, cpu_read_vmstat, NULL);
+
+
 #ifdef CONFIG_KEXEC
 #include <linux/kexec.h>
 
@@ -138,6 +159,7 @@ int __devinit register_cpu(struct cpu *c
 	if (!error)
 		error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes);
 #endif
+	sysdev_create_file(&cpu->sysdev, &attr_vmstat);
 	return error;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux