On Tue, 20 Sep 2005, Petr Vandrovec wrote:
> slab belonging to node#1, while having acquired lock for cachep belonging
> to node #0. Due to this check_spinlock_acquired_node(cachep, nodeid) fails
> (check_spinlock_acquired_node(cachep, 0) would succeed).
Hmmm. If a node runs out of memory then pages from another node may end up
on the slab list of a node. But it seems that free_block cannot handle
that properly.
How are you producing the problem?
Could you try the following patch:
---
The numa slab allocator may allocate pages from foreign nodes onto the lists
for a particular node if a node runs out of memory. Inspecting the slab->nodeid
field will not reflect that the page is now in use for the slabs of another node.
This patch fixes that issue by adding a node field to free_block so that the caller
can indicate which node currently uses a slab.
Also removes the check for the current node from kmalloc_cache_node since the
process may shift later to another node which may lead to an allocation on another
node than intended.
Signed-off-by: Christoph Lameter <[email protected]>
Index: linux-2.6.14-rc1/mm/slab.c
===================================================================
--- linux-2.6.14-rc1.orig/mm/slab.c 2005-09-21 00:09:05.000000000 +0000
+++ linux-2.6.14-rc1/mm/slab.c 2005-09-21 00:48:12.000000000 +0000
@@ -639,7 +639,7 @@ static enum {
static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
static void enable_cpucache (kmem_cache_t *cachep);
static void cache_reap (void *unused);
static int __node_shrink(kmem_cache_t *cachep, int node);
@@ -804,7 +804,7 @@ static inline void __drain_alien_cache(k
if (ac->avail) {
spin_lock(&rl3->list_lock);
- free_block(cachep, ac->entry, ac->avail);
+ free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0;
spin_unlock(&rl3->list_lock);
}
@@ -925,7 +925,7 @@ static int __devinit cpuup_callback(stru
/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail);
+ free_block(cachep, nc->entry, nc->avail, node);
if (!cpus_empty(mask)) {
spin_unlock(&l3->list_lock);
@@ -934,7 +934,7 @@ static int __devinit cpuup_callback(stru
if (l3->shared) {
free_block(cachep, l3->shared->entry,
- l3->shared->avail);
+ l3->shared->avail, node);
kfree(l3->shared);
l3->shared = NULL;
}
@@ -1882,12 +1882,13 @@ static void do_drain(void *arg)
{
kmem_cache_t *cachep = (kmem_cache_t*)arg;
struct array_cache *ac;
+ int node = numa_node_id();
check_irq_off();
ac = ac_data(cachep);
- spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
- free_block(cachep, ac->entry, ac->avail);
- spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
+ spin_lock(&cachep->nodelists[node]->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node);
+ spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0;
}
@@ -2608,7 +2609,7 @@ done:
/*
* Caller needs to acquire correct kmem_list's list_lock
*/
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
{
int i;
struct kmem_list3 *l3;
@@ -2617,14 +2618,12 @@ static void free_block(kmem_cache_t *cac
void *objp = objpp[i];
struct slab *slabp;
unsigned int objnr;
- int nodeid = 0;
slabp = GET_PAGE_SLAB(virt_to_page(objp));
- nodeid = slabp->nodeid;
- l3 = cachep->nodelists[nodeid];
+ l3 = cachep->nodelists[node];
list_del(&slabp->list);
objnr = (objp - slabp->s_mem) / cachep->objsize;
- check_spinlock_acquired_node(cachep, nodeid);
+ check_spinlock_acquired_node(cachep, node);
check_slabp(cachep, slabp);
@@ -2664,13 +2663,14 @@ static void cache_flusharray(kmem_cache_
{
int batchcount;
struct kmem_list3 *l3;
+ int node = numa_node_id();
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- l3 = cachep->nodelists[numa_node_id()];
+ l3 = cachep->nodelists[node];
spin_lock(&l3->list_lock);
if (l3->shared) {
struct array_cache *shared_array = l3->shared;
@@ -2686,7 +2686,7 @@ static void cache_flusharray(kmem_cache_
}
}
- free_block(cachep, ac->entry, batchcount);
+ free_block(cachep, ac->entry, batchcount, node);
free_done:
#if STATS
{
@@ -2751,7 +2751,7 @@ static inline void __cache_free(kmem_cac
} else {
spin_lock(&(cachep->nodelists[nodeid])->
list_lock);
- free_block(cachep, &objp, 1);
+ free_block(cachep, &objp, 1, nodeid);
spin_unlock(&(cachep->nodelists[nodeid])->
list_lock);
}
@@ -2844,7 +2844,7 @@ void *kmem_cache_alloc_node(kmem_cache_t
unsigned long save_flags;
void *ptr;
- if (nodeid == numa_node_id() || nodeid == -1)
+ if (nodeid == -1)
return __cache_alloc(cachep, flags);
if (unlikely(!cachep->nodelists[nodeid])) {
@@ -3079,7 +3079,7 @@ static int alloc_kmemlist(kmem_cache_t *
if ((nc = cachep->nodelists[node]->shared))
free_block(cachep, nc->entry,
- nc->avail);
+ nc->avail, node);
l3->shared = new;
if (!cachep->nodelists[node]->alien) {
@@ -3160,7 +3160,7 @@ static int do_tune_cpucache(kmem_cache_t
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail);
+ free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
kfree(ccold);
}
@@ -3240,7 +3240,7 @@ static void drain_array_locked(kmem_cach
if (tofree > ac->avail) {
tofree = (ac->avail+1)/2;
}
- free_block(cachep, ac->entry, tofree);
+ free_block(cachep, ac->entry, tofree, node);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void*)*ac->avail);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
|
|