[PATCH rc1-mm3] timers: kill timer_list->lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The timer->lock is needed in __mod_timer() because the freshly
inited timer has base == NULL, so __mod_timer() can't use old_base
for synchronization.

With this patch init_timer() sets ->_base = per_cpu(tvec_bases).
This simplifies the code because we don't need to check that
base != NULL, and slightly speedups __mod_timer(). And reduces
the timer_list's size.

One problem. TIMER_INITIALIZER can't use per_cpu(tvec_bases).
So this patch adds global
	struct {
		spinlock_t lock;
		struct timer_list *running_timer;
	} fake_timer_base;
which is used by TIMER_INITIALIZER.

It is indeed ugly. But this can't have scalability problems.
The global fake_timer_base.lock is used only when __mod_timer()
is called for the first time and the timer was compile time
initialized. After that the timer migrates to the local CPU.

Signed-off-by: Oleg Nesterov <[email protected]>

--- rc1-mm3/include/linux/timer.h~	2005-03-27 21:05:51.000000000 +0400
+++ rc1-mm3/include/linux/timer.h	2005-03-27 21:07:29.000000000 +0400
@@ -12,7 +12,6 @@ struct timer_list {
 	struct list_head entry;
 	unsigned long expires;
 
-	spinlock_t lock;
 	unsigned long magic;
 
 	void (*function)(unsigned long);
@@ -25,15 +24,11 @@ struct timer_list {
  * To save space, we play games with the least significant bit
  * of timer_list->_base.
  *
- * If (_base & __TIMER_PENDING), the timer is pending. Implies
- * (_base & ~__TIMER_PENDING) != NULL.
+ * If (_base & __TIMER_PENDING), the timer is pending.
  *
- * (_base & ~__TIMER_PENDING), if != NULL, is the address of the
- * timer's per-cpu tvec_t_base_s where it was last scheduled and
- * where it may still be running.
- *
- * If (_base == NULL), the timer was not scheduled yet or it was
- * cancelled by del_timer_sync(), so it is not running on any CPU.
+ * (_base & ~__TIMER_PENDING) is the address of the timer's
+ * per-cpu tvec_t_base_s where it was last registered and where
+ * it may still be running.
  */
 
 #define	__TIMER_PENDING	1
@@ -45,28 +40,17 @@ static inline int __timer_pending(struct
 
 #define TIMER_MAGIC	0x4b87ad6e
 
+extern struct fake_timer_base fake_timer_base;
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.function = (_function),			\
 		.expires = (_expires),				\
 		.data = (_data),				\
-		._base = NULL,					\
+		._base = (void*)&fake_timer_base,		\
 		.magic = TIMER_MAGIC,				\
-		.lock = SPIN_LOCK_UNLOCKED,			\
 	}
 
-/***
- * init_timer - initialize a timer.
- * @timer: the timer to be initialized
- *
- * init_timer() must be done to a timer prior calling *any* of the
- * other timer functions.
- */
-static inline void init_timer(struct timer_list * timer)
-{
-	timer->_base = NULL;
-	timer->magic = TIMER_MAGIC;
-	spin_lock_init(&timer->lock);
-}
+void fastcall init_timer(struct timer_list * timer);
 
 /***
  * timer_pending - is a timer pending?
--- rc1-mm3/kernel/timer.c~	2005-03-27 21:06:19.000000000 +0400
+++ rc1-mm3/kernel/timer.c	2005-03-27 21:07:29.000000000 +0400
@@ -68,8 +68,8 @@ typedef struct tvec_root_s {
 
 struct tvec_t_base_s {
 	spinlock_t lock;
-	unsigned long timer_jiffies;
 	struct timer_list *running_timer;
+	unsigned long timer_jiffies;
 	tvec_root_t tv1;
 	tvec_t tv2;
 	tvec_t tv3;
@@ -78,6 +78,16 @@ struct tvec_t_base_s {
 } ____cacheline_aligned_in_smp;
 
 typedef struct tvec_t_base_s tvec_base_t;
+static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+
+struct fake_timer_base {
+	spinlock_t lock;
+	struct timer_list *running_timer;
+} ____cacheline_aligned_in_smp;
+
+struct fake_timer_base fake_timer_base =
+		{ .lock = SPIN_LOCK_UNLOCKED };
+EXPORT_SYMBOL(fake_timer_base);
 
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
@@ -87,6 +97,28 @@ static inline void set_running_timer(tve
 #endif
 }
 
+/***
+ * init_timer - initialize a timer.
+ * @timer: the timer to be initialized
+ *
+ * init_timer() must be done to a timer prior calling *any* of the
+ * other timer functions.
+ */
+void fastcall init_timer(struct timer_list * timer)
+{
+	BUILD_BUG_ON(
+		offsetof(tvec_base_t, lock) !=
+		offsetof(struct fake_timer_base, lock)
+			||
+		offsetof(tvec_base_t, running_timer) !=
+		offsetof(struct fake_timer_base, running_timer)
+	);
+
+	timer->_base = &per_cpu(tvec_bases, __smp_processor_id());
+	timer->magic = TIMER_MAGIC;
+}
+EXPORT_SYMBOL(init_timer);
+
 static inline void __set_base(struct timer_list *timer,
 				tvec_base_t *base, int pending)
 {
@@ -101,9 +133,6 @@ static inline tvec_base_t *timer_base(st
 	return (tvec_base_t*)((unsigned long)timer->_base & ~__TIMER_PENDING);
 }
 
-/* Fake initialization */
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
-
 static void check_timer_failed(struct timer_list *timer)
 {
 	static int whine_count;
@@ -118,7 +147,6 @@ static void check_timer_failed(struct ti
 	/*
 	 * Now fix it up
 	 */
-	spin_lock_init(&timer->lock);
 	timer->magic = TIMER_MAGIC;
 }
 
@@ -182,39 +210,33 @@ int __mod_timer(struct timer_list *timer
 	check_timer(timer);
 
 	do {
-		spin_lock_irqsave(&timer->lock, flags);
+		local_irq_save(flags);
 		new_base = &__get_cpu_var(tvec_bases);
 		old_base = timer_base(timer);
 
-		/* Prevent AB-BA deadlocks. */
+		/* Prevent AB-BA deadlocks.
+		 * NOTE: (old_base == new_base) => (new_lock == 0)
+		 */
 		new_lock = old_base < new_base;
 		if (new_lock)
 			spin_lock(&new_base->lock);
+		spin_lock(&old_base->lock);
 
-		/* Note:
-		 *	(old_base == NULL)     => (new_lock == 1)
-		 *	(old_base == new_base) => (new_lock == 0)
-		 */
-		if (old_base) {
-			spin_lock(&old_base->lock);
+		if (timer_base(timer) != old_base)
+			goto unlock;
 
-			if (timer_base(timer) != old_base)
+		if (old_base != new_base) {
+			/* Ensure the timer is serialized. */
+			if (old_base->running_timer == timer)
 				goto unlock;
 
-			if (old_base != new_base) {
-				/* Ensure the timer is serialized. */
-				if (old_base->running_timer == timer)
-					goto unlock;
-
-				if (!new_lock) {
-					spin_lock(&new_base->lock);
-					new_lock = 1;
-				}
+			if (!new_lock) {
+				spin_lock(&new_base->lock);
+				new_lock = 1;
 			}
 		}
 
 		ret = 0;
-		/* We hold timer->lock, no need to check old_base != 0. */
 		if (timer_pending(timer)) {
 			list_del(&timer->entry);
 			ret = 1;
@@ -224,11 +246,9 @@ int __mod_timer(struct timer_list *timer
 		internal_add_timer(new_base, timer);
 		__set_base(timer, new_base, 1);
 unlock:
-		if (old_base)
-			spin_unlock(&old_base->lock);
 		if (new_lock)
 			spin_unlock(&new_base->lock);
-		spin_unlock_irqrestore(&timer->lock, flags);
+		spin_unlock_irqrestore(&old_base->lock, flags);
 	} while (ret == -1);
 
 	return ret;
@@ -355,19 +375,14 @@ EXPORT_SYMBOL(del_timer);
  */
 int del_timer_sync(struct timer_list *timer)
 {
-	int ret;
+	unsigned long flags;
+	tvec_base_t *base;
+	int ret = -1;
 
 	check_timer(timer);
 
-	ret = 0;
-	for (;;) {
-		unsigned long flags;
-		tvec_base_t *base;
-
+	do {
 		base = timer_base(timer);
-		if (!base)
-			return ret;
-
 		spin_lock_irqsave(&base->lock, flags);
 
 		if (base->running_timer == timer)
@@ -376,18 +391,19 @@ int del_timer_sync(struct timer_list *ti
 		if (timer_base(timer) != base)
 			goto unlock;
 
+		ret = 0;
 		if (timer_pending(timer)) {
 			list_del(&timer->entry);
+			__set_base(timer, base, 0);
 			ret = 1;
 		}
-		/* Need to make sure that anybody who sees a NULL base
-		 * also sees the list ops */
-		smp_wmb();
-		timer->_base = NULL;
 unlock:
 		spin_unlock_irqrestore(&base->lock, flags);
-	}
+	} while (ret == -1);
+
+	return ret;
 }
+
 EXPORT_SYMBOL(del_timer_sync);
 
 /***
@@ -1323,22 +1339,16 @@ static void __devinit init_timers_cpu(in
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 {
 	struct timer_list *timer;
 
 	while (!list_empty(head)) {
 		timer = list_entry(head->next, struct timer_list, entry);
-		/* We're locking backwards from __mod_timer order here,
-		   beware deadlock. */
-		if (!spin_trylock(&timer->lock))
-			return 0;
 		list_del(&timer->entry);
 		internal_add_timer(new_base, timer);
 		__set_base(timer, new_base, 1);
-		spin_unlock(&timer->lock);
 	}
-	return 1;
 }
 
 static void __devinit migrate_timers(int cpu)
@@ -1352,7 +1362,6 @@ static void __devinit migrate_timers(int
 	new_base = &get_cpu_var(tvec_bases);
 
 	local_irq_disable();
-again:
 	/* Prevent deadlocks via ordering by old_base < new_base. */
 	if (old_base < new_base) {
 		spin_lock(&new_base->lock);
@@ -1365,26 +1374,17 @@ again:
 	if (old_base->running_timer)
 		BUG();
 	for (i = 0; i < TVR_SIZE; i++)
-		if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
-			goto unlock_again;
-	for (i = 0; i < TVN_SIZE; i++)
-		if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
-		    || !migrate_timer_list(new_base, old_base->tv3.vec + i)
-		    || !migrate_timer_list(new_base, old_base->tv4.vec + i)
-		    || !migrate_timer_list(new_base, old_base->tv5.vec + i))
-			goto unlock_again;
+		migrate_timer_list(new_base, old_base->tv1.vec + i);
+	for (i = 0; i < TVN_SIZE; i++) {
+		migrate_timer_list(new_base, old_base->tv2.vec + i);
+		migrate_timer_list(new_base, old_base->tv3.vec + i);
+		migrate_timer_list(new_base, old_base->tv4.vec + i);
+		migrate_timer_list(new_base, old_base->tv5.vec + i);
+	}
 	spin_unlock(&old_base->lock);
 	spin_unlock(&new_base->lock);
 	local_irq_enable();
 	put_cpu_var(tvec_bases);
-	return;
-
-unlock_again:
-	/* Avoid deadlock with __mod_timer, by backing off. */
-	spin_unlock(&old_base->lock);
-	spin_unlock(&new_base->lock);
-	cpu_relax();
-	goto again;
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux