Christoph Lameter wrote:
The -1 is optimized away for the non NUMA case. In the NUMA case its an
additional parameter that is passed to kmem_cache_alloc. So its one
additional register load that allows us to not have an additional function
for the case non node specific allocations.
Correct, I was thinking about the NUMA case.
You've decided to add one register load to every call of kmalloc. On
i386, kmalloc_node() is a 24-byte function. I'd bet that adding the node
parameter to every call of kmalloc causes a .text increase larger than
240 bytes. And I have not yet considered that you have increased the
number of conditional branches in every kmalloc(32,GFP_KERNEL) call by
33%, i.e. from 3 to 4 conditional branch instructions.
I'd add an explicit kmalloc_node function. Attached is a prototype
patch. You'd have to reintroduce the flags field to
kmem_cache_alloc_node() and update kmalloc_node.
The patch was manually edited, I hope it applies to a recent tree ;-)
What do you think?
--
Manfred
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 6
// SUBLEVEL = 11
// EXTRAVERSION = -mm2
--- 2.6/include/linux/slab.h 2005-03-12 01:05:50.000000000 +0100
+++ build-2.6/include/linux/slab.h 2005-03-30 19:33:56.158317105 +0200
@@ -62,16 +62,9 @@
extern int kmem_cache_destroy(kmem_cache_t *);
extern int kmem_cache_shrink(kmem_cache_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, int);
-#ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node(kmem_cache_t *, int);
-#else
-static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int node)
-{
- return kmem_cache_alloc(cachep, GFP_KERNEL);
-}
-#endif
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
+extern kmem_cache_t * kmem_find_general_cachep(size_t size, int gfpflags);
/* Size description struct for general caches. */
struct cache_sizes {
@@ -109,6 +102,20 @@
extern void kfree(const void *);
extern unsigned int ksize(const void *);
+#ifdef CONFIG_NUMA
+extern void *kmem_cache_alloc_node(kmem_cache_t *, int);
+extern void *kmalloc_node(size_t size, int flags, int node);
+#else
+static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int node)
+{
+ return kmem_cache_alloc(cachep, GFP_KERNEL);
+}
+static inline void *kmalloc_node(size_t size, int flags, int node)
+{
+ return kmalloc(size, flags);
+}
+#endif
+
extern int FASTCALL(kmem_cache_reap(int));
extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
--- 2.6/mm/slab.c 2005-03-12 12:36:34.000000000 +0100
+++ build-2.6/mm/slab.c 2005-03-30 19:44:15.428757330 +0200
@@ -586,7 +586,7 @@
return cachep->array[smp_processor_id()];
}
-static inline kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags)
{
struct cache_sizes *csizep = malloc_sizes;
@@ -610,6 +610,12 @@
return csizep->cs_cachep;
}
+kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
+{
+ return __find_general_cachep(size, gfpflags);
+}
+EXPORT_SYMBOL(kmem_find_general_cachep);
+
/* Cal the num objs, wastage, and bytes left over for a given slab size. */
static void cache_estimate (unsigned long gfporder, size_t size, size_t align,
int flags, size_t *left_over, unsigned int *num)
@@ -2200,7 +2197,7 @@
list_del(&slabp->list);
objnr = (objp - slabp->s_mem) / cachep->objsize;
check_slabp(cachep, slabp);
-#if 0 /* disabled, not compatible with leak detection */
+#if DEBUG
if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
printk(KERN_ERR "slab: double free detected in cache "
"'%s', objp %p\n", cachep->name, objp);
@@ -2450,6 +2447,16 @@
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+void *kmalloc_node(size_t size, int flags, int node)
+{
+ kmem_cache_t *cachep;
+
+ cachep = kmem_find_general_cachep(size, flags);
+ if (unlikely(cachep == NULL))
+ return NULL;
+ return kmem_cache_alloc_node(cachep, node);
+}
+EXPORT_SYMBOL(kmalloc_node);
#endif
/**
@@ -2477,7 +2484,12 @@
{
kmem_cache_t *cachep;
- cachep = kmem_find_general_cachep(size, flags);
+ /* If you want to save a few bytes .text space: replace
+ * __ with kmem_.
+ * Then kmalloc uses the uninlined functions instead of the inline
+ * functions.
+ */
+ cachep = __find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
return __cache_alloc(cachep, flags);
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]