[PATCH -rt 5/5] slub: -rt port

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Peter Zijlstra <[email protected]>
---
 init/Kconfig |    1 
 mm/slub.c    |  260 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 214 insertions(+), 47 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c
+++ linux-2.6/mm/slub.c
@@ -20,6 +20,7 @@
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
+#include <linux/pagemap.h>
 
 /*
  * Lock order:
@@ -99,6 +100,8 @@
  * 			the fast path and disables lockless freelists.
  */
 
+#ifndef CONFIG_PREEMPT_RT
+
 #define FROZEN (1 << PG_active)
 
 #ifdef CONFIG_SLUB_DEBUG
@@ -137,6 +140,46 @@ static inline void ClearSlabDebug(struct
 	page->flags &= ~SLABDEBUG;
 }
 
+#else /* CONFIG_PREEMPT_RT */
+/*
+ * when the allocator is preemptible these operations might be concurrent with
+ * lock_page(), and hence need atomic ops.
+ */
+
+#define PG_frozen		PG_active
+#define PG_debug		PG_error
+
+static inline int SlabFrozen(struct page *page)
+{
+	return test_bit(PG_frozen, &page->flags);
+}
+
+static inline void SetSlabFrozen(struct page *page)
+{
+	set_bit(PG_frozen, &page->flags);
+}
+
+static inline void ClearSlabFrozen(struct page *page)
+{
+	clear_bit(PG_frozen, &page->flags);
+}
+
+static inline int SlabDebug(struct page *page)
+{
+	return test_bit(PG_debug, &page->flags);
+}
+
+static inline void SetSlabDebug(struct page *page)
+{
+	set_bit(PG_debug, &page->flags);
+}
+
+static inline void ClearSlabDebug(struct page *page)
+{
+	clear_bit(PG_debug, &page->flags);
+}
+#endif
+
 /*
  * Issues still to be resolved:
  *
@@ -1021,7 +1064,7 @@ static struct page *new_slab(struct kmem
 	BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
 
 	if (flags & __GFP_WAIT)
-		local_irq_enable();
+		local_irq_enable_nort();
 
 	page = allocate_slab(s, flags & GFP_LEVEL_MASK, node);
 	if (!page)
@@ -1057,7 +1100,7 @@ static struct page *new_slab(struct kmem
 	page->inuse = 0;
 out:
 	if (flags & __GFP_WAIT)
-		local_irq_disable();
+		local_irq_disable_nort();
 	return page;
 }
 
@@ -1117,6 +1160,7 @@ static void discard_slab(struct kmem_cac
 /*
  * Per slab locking using the pagelock
  */
+#ifndef CONFIG_PREEMPT_RT
 static __always_inline void slab_lock(struct page *page)
 {
 	bit_spin_lock(PG_locked, &page->flags);
@@ -1134,6 +1178,22 @@ static __always_inline int slab_trylock(
 	rc = bit_spin_trylock(PG_locked, &page->flags);
 	return rc;
 }
+#else
+static __always_inline void slab_lock(struct page *page)
+{
+	lock_page(page);
+}
+
+static __always_inline void slab_unlock(struct page *page)
+{
+	unlock_page(page);
+}
+
+static __always_inline int slab_trylock(struct page *page)
+{
+	return !TestSetPageLocked(page);
+}
+#endif
 
 /*
  * Management of partially allocated slabs
@@ -1154,8 +1214,7 @@ static void add_partial(struct kmem_cach
 	spin_unlock(&n->list_lock);
 }
 
-static void remove_partial(struct kmem_cache *s,
-						struct page *page)
+static void remove_partial(struct kmem_cache *s, struct page *page)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 
@@ -1282,6 +1341,7 @@ static void unfreeze_slab(struct kmem_ca
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 
+	BUG_ON(!SlabFrozen(page));
 	ClearSlabFrozen(page);
 	if (page->inuse) {
 
@@ -1310,29 +1370,52 @@ static void unfreeze_slab(struct kmem_ca
 	}
 }
 
+static void **get_lockless_object(struct page *page)
+{
+	void **object;
+
+again:
+	object = page->lockless_freelist;
+	if (object && __local_cmpxchg(&page->lockless_freelist,
+				object, object[page->offset]) != object)
+		goto again;
+
+	return object;
+}
+
 /*
  * Remove the cpu slab
  */
 static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
 {
 	/*
+	 * take away the slab page before merging the lockless free list into
+	 * the regular free list to ensure that no new entries are put on the
+	 * lockless list between the merge and removal.
+	 */
+	BUG_ON(page != s->cpu_slab[cpu]);
+	s->cpu_slab[cpu] = NULL;
+	barrier();
+
+	/*
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
 	 * to occur.
 	 */
-	while (unlikely(page->lockless_freelist)) {
+	for (;;) {
 		void **object;
 
 		/* Retrieve object from cpu_freelist */
-		object = page->lockless_freelist;
-		page->lockless_freelist = page->lockless_freelist[page->offset];
+		object = get_lockless_object(page);
+		if (likely(!object))
+			break;
 
 		/* And put onto the regular freelist */
 		object[page->offset] = page->freelist;
 		page->freelist = object;
 		page->inuse--;
 	}
-	s->cpu_slab[cpu] = NULL;
+
 	unfreeze_slab(s, page);
 }
 
@@ -1354,6 +1437,55 @@ static void __flush_cpu_slab(struct kmem
 		flush_slab(s, page, cpu);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+struct slab_work_struct {
+	struct work_struct work;
+	struct kmem_cache *s;
+};
+
+static struct workqueue_struct *flush_slab_workqueue;
+static DEFINE_PER_CPU(struct slab_work_struct, slab_works);
+static DEFINE_MUTEX(flush_slab_mutex); /* XXX kill this */
+
+static int __init flush_cpu_slab_init(void)
+{
+	flush_slab_workqueue = create_workqueue("slub_flushd");
+	if (!flush_slab_workqueue)
+		panic("Failed to create slub_flushd\n");
+
+	return 0;
+}
+
+core_initcall(flush_cpu_slab_init);
+
+static void flush_cpu_slab_wq(struct work_struct *work)
+{
+	struct slab_work_struct *sw;
+	int cpu = smp_processor_id();
+
+	sw = container_of(work, struct slab_work_struct, work);
+	__flush_cpu_slab(sw->s, cpu);
+}
+
+static void flush_all(struct kmem_cache *s)
+{
+	int cpu;
+	struct workqueue_struct *wq = flush_slab_workqueue;
+
+	mutex_lock(&flush_slab_mutex);
+	for_each_online_cpu(cpu) {
+		struct slab_work_struct *sw = &per_cpu(slab_works, cpu);
+
+		INIT_WORK(&sw->work, flush_cpu_slab_wq);
+		sw->s = s;
+		queue_work_cpu(wq, &sw->work, cpu);
+	}
+	flush_workqueue(wq);
+	mutex_unlock(&flush_slab_mutex);
+}
+
+#else
+
 static void flush_cpu_slab(void *d)
 {
 	struct kmem_cache *s = d;
@@ -1374,6 +1506,7 @@ static void flush_all(struct kmem_cache 
 	local_irq_restore(flags);
 #endif
 }
+#endif
 
 /*
  * Slow path. The lockless freelist is empty or we need to perform
@@ -1396,13 +1529,24 @@ static void *__slab_alloc(struct kmem_ca
 		gfp_t gfpflags, int node, void *addr, struct page *page)
 {
 	void **object;
+	unsigned long flags;
 	int cpu = smp_processor_id();
 
+	local_irq_save_nort(flags);
+
+again:
 	if (!page)
 		goto new_slab;
 
 	slab_lock(page);
-	if (unlikely(node != -1 && page_to_nid(page) != node))
+	if (!SlabFrozen(page) || page != s->cpu_slab[cpu]) {
+		slab_unlock(page);
+		page = s->cpu_slab[cpu];
+		goto again;
+	}
+
+	if (unlikely((node != -1 && page_to_nid(page) != node) ||
+			page->lockless_freelist))  /* validate the need for this check */
 		goto another_slab;
 load_freelist:
 	object = page->freelist;
@@ -1415,7 +1559,9 @@ load_freelist:
 	page->lockless_freelist = object[page->offset];
 	page->inuse = s->objects;
 	page->freelist = NULL;
+out:
 	slab_unlock(page);
+	local_irq_restore_nort(flags);
 	return object;
 
 another_slab:
@@ -1424,40 +1570,42 @@ another_slab:
 new_slab:
 	page = get_partial(s, gfpflags, node);
 	if (page) {
-		s->cpu_slab[cpu] = page;
+		struct page *cur_page;
+
+		cur_page = __local_cmpxchg(&s->cpu_slab[cpu], NULL, page);
+		if (cur_page) {
+			/*
+			 * Someone else populated the cpu_slab while we got
+			 * preempted. We want the current one since its cache
+			 * hot
+			 */
+			unfreeze_slab(s, page);
+			page = cur_page;
+			goto again;
+		}
 		goto load_freelist;
 	}
 
 	page = new_slab(s, gfpflags, node);
 	if (page) {
-		cpu = smp_processor_id();
-		if (s->cpu_slab[cpu]) {
+		struct page *cur_page;
+
+		slab_lock(page);
+		SetSlabFrozen(page);
+		cur_page = __local_cmpxchg(&s->cpu_slab[cpu], NULL, page);
+		if (cur_page) {
 			/*
-			 * Someone else populated the cpu_slab while we
-			 * enabled interrupts, or we have gotten scheduled
-			 * on another cpu. The page may not be on the
-			 * requested node even if __GFP_THISNODE was
-			 * specified. So we need to recheck.
+			 * Someone else populated the cpu_slab while we got
+			 * preempted. We want the current one since its cache
+			 * hot
 			 */
-			if (node == -1 ||
-				page_to_nid(s->cpu_slab[cpu]) == node) {
-				/*
-				 * Current cpuslab is acceptable and we
-				 * want the current one since its cache hot
-				 */
-				discard_slab(s, page);
-				page = s->cpu_slab[cpu];
-				slab_lock(page);
-				goto load_freelist;
-			}
-			/* New slab does not fit our expectations */
-			flush_slab(s, s->cpu_slab[cpu], cpu);
+			unfreeze_slab(s, page);
+			page = cur_page;
+			goto again;
 		}
-		slab_lock(page);
-		SetSlabFrozen(page);
-		s->cpu_slab[cpu] = page;
 		goto load_freelist;
 	}
+	local_irq_restore_nort(flags);
 	return NULL;
 debug:
 	object = page->freelist;
@@ -1466,8 +1614,7 @@ debug:
 
 	page->inuse++;
 	page->freelist = object[page->offset];
-	slab_unlock(page);
-	return object;
+	goto out;
 }
 
 /*
@@ -1487,18 +1634,20 @@ static void __always_inline *slab_alloc(
 	void **object;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	__local_begin(flags);
 	page = s->cpu_slab[smp_processor_id()];
 	if (unlikely(!page || !page->lockless_freelist ||
-			(node != -1 && page_to_nid(page) != node)))
+			(node != -1 && page_to_nid(page) != node))) {
 
+do_alloc:
 		object = __slab_alloc(s, gfpflags, node, addr, page);
 
-	else {
-		object = page->lockless_freelist;
-		page->lockless_freelist = object[page->offset];
+	} else {
+		object = get_lockless_object(page);
+		if (unlikely(!object))
+			goto do_alloc;
 	}
-	local_irq_restore(flags);
+	__local_end(flags);
 	return object;
 }
 
@@ -1529,7 +1678,9 @@ static void __slab_free(struct kmem_cach
 {
 	void *prior;
 	void **object = (void *)x;
+	unsigned long flags;
 
+	local_irq_save_nort(flags);
 	slab_lock(page);
 
 	if (unlikely(SlabDebug(page)))
@@ -1555,6 +1706,7 @@ checks_ok:
 
 out_unlock:
 	slab_unlock(page);
+	local_irq_restore_nort(flags);
 	return;
 
 slab_empty:
@@ -1566,6 +1718,7 @@ slab_empty:
 
 	slab_unlock(page);
 	discard_slab(s, page);
+	local_irq_restore_nort(flags);
 	return;
 
 debug:
@@ -1591,15 +1744,30 @@ static void __always_inline slab_free(st
 	void **object = (void *)x;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	__local_begin(flags);
+	/*
+	 * We have to either take slab_lock(page) or disable preemption while
+	 * trying to add to the lockless freelist because we have to guarantee
+	 * page == s->cpu_slab[cpu] during the operation.
+	 *
+	 * fix this by allowing non active slabs to have a lockless_freelist?
+	 * cannot do since Christoph is about to pull lockless_freelist from
+	 * the struct page.
+	 *
+	 * preempt_disable() seems cheapest for these few instructions vs the
+	 * atomic ops involved with slab_lock()
+	 */
+	preempt_disable();
 	if (likely(page == s->cpu_slab[smp_processor_id()] &&
-						!SlabDebug(page))) {
+				!SlabDebug(page))) {
 		object[page->offset] = page->lockless_freelist;
 		page->lockless_freelist = object;
-	} else
+		preempt_enable();
+	} else {
+		preempt_enable();
 		__slab_free(s, page, x, addr);
-
-	local_irq_restore(flags);
+	}
+	__local_end(flags);
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -578,7 +578,6 @@ config SLAB
 
 config SLUB
 	bool "SLUB (Unqueued Allocator)"
-	depends on !PREEMPT_RT
 	help
 	   SLUB is a slab allocator that minimizes cache line usage
 	   instead of managing queues of cached objects (SLAB approach).

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux