[PATCH RFC 8/9] RCU: Make RCU priority boosting consume less power

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Work in progress, not for inclusion.

This patch modified the RCU priority booster to explicitly sleep when
there are no RCU readers in need of priority boosting.  This should be
a power-consumption improvement over the one-second polling cycle in
the underlying RCU priority-boosting patch.

Signed-off-by: Paul E. McKenney <[email protected]>
---

 include/linux/rcupreempt.h |   15 ++++++
 kernel/rcupreempt.c        |  102 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 2 deletions(-)

diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h
--- linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h	2007-08-24 11:24:59.000000000 -0700
+++ linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h	2007-08-24 18:12:41.000000000 -0700
@@ -60,6 +60,21 @@ enum rcu_boost_state {
 
 #define N_RCU_BOOST_STATE (RCU_BOOST_INVALID + 1)
 
+/*
+ * RCU-booster state with respect to sleeping.  The RCU booster
+ * sleeps when no task has recently been seen sleeping in an RCU
+ * read-side critical section, and is awakened when a new sleeper
+ * appears.
+ */
+enum rcu_booster_state {
+	RCU_BOOSTER_ACTIVE = 0,   /* RCU booster actively scanning. */
+	RCU_BOOSTER_DROWSY = 1,   /* RCU booster is considering sleeping. */
+	RCU_BOOSTER_SLEEPING = 2, /* RCU booster is asleep. */
+	RCU_BOOSTER_INVALID = 3,  /* For bogus state sightings. */
+};
+
+#define N_RCU_BOOSTER_STATE (RCU_BOOSTER_INVALID + 1)
+
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
 #define call_rcu_bh(head, rcu) call_rcu(head, rcu)
diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/kernel/rcupreempt.c linux-2.6.22-H-boostsleep/kernel/rcupreempt.c
--- linux-2.6.22-G-boosttorture/kernel/rcupreempt.c	2007-08-27 15:42:57.000000000 -0700
+++ linux-2.6.22-H-boostsleep/kernel/rcupreempt.c	2007-08-27 15:42:37.000000000 -0700
@@ -108,6 +108,7 @@ struct rcu_boost_dat {
 	unsigned long rbs_unboosted;
 #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
 	unsigned long rbs_stats[N_RCU_BOOST_DAT_EVENTS][N_RCU_BOOST_STATE];
+	unsigned long rbs_qw_stats[N_RCU_BOOSTER_STATE];
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
 };
 #define RCU_BOOST_ELEMENTS 4
@@ -115,6 +116,10 @@ struct rcu_boost_dat {
 static int rcu_boost_idx = -1; /* invalid value for early RCU use. */
 static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_dat[RCU_BOOST_ELEMENTS]);
 static struct task_struct *rcu_boost_task;
+static DEFINE_SPINLOCK(rcu_boost_quiesce_lock);
+static enum rcu_booster_state rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+static unsigned long rbs_qs_stats[2][N_RCU_BOOSTER_STATE];
+wait_queue_head_t rcu_booster_quiesce_wq;
 
 #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
 
@@ -171,6 +176,15 @@ static char *rcu_boost_state_error[] = {
 	 "?  ?",  /* unlock */
 };
 
+/* Labels for RCU booster state printout. */
+
+static char *rcu_booster_state_label[] = {
+	"Active",
+	"Drowsy",
+	"Sleeping",
+	"???",
+};
+
 /*
  * Print out RCU booster task statistics at the specified interval.
  */
@@ -221,6 +235,14 @@ static void rcu_boost_dat_stat_print(voi
 						       cpu)[i].rbs_stats[event][state];
 			}
 		}
+	for (state = 0; state < N_RCU_BOOSTER_STATE; state++) {
+		sum.rbs_qw_stats[state] = 0;
+		for_each_possible_cpu(cpu)
+			for (i = 0; i < RCU_BOOST_ELEMENTS; i++)
+				sum.rbs_qw_stats[state] +=
+					per_cpu(rcu_boost_dat,
+						cpu)[i].rbs_qw_stats[state];
+	}
 
 	/* Print them out! */
 
@@ -240,6 +262,24 @@ static void rcu_boost_dat_stat_print(voi
 		       rcu_boost_state_event[event], buf);
 	}
 
+	printk(KERN_INFO "RCU booster state: %s\n",
+	       rcu_booster_quiesce_state >= 0 &&
+	       rcu_booster_quiesce_state < N_RCU_BOOSTER_STATE
+		? rcu_booster_state_label[rcu_booster_quiesce_state]
+		: "???");
+	i = 0;
+	for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+		i += sprintf(&buf[i], " %ld", rbs_qs_stats[0][state]);
+	printk(KERN_INFO "No tasks found: %s\n", buf);
+	i = 0;
+	for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+		i += sprintf(&buf[i], " %ld", rbs_qs_stats[1][state]);
+	printk(KERN_INFO "Tasks found: %s\n", buf);
+	i = 0;
+	for (state = 0; state < N_RCU_BOOSTER_STATE; state++)
+		i += sprintf(&buf[i], " %ld", sum.rbs_qw_stats[state]);
+	printk(KERN_INFO "Awaken opportunities: %s\n", buf);
+
 	/* Go away and don't come back for awhile. */
 
 	lastprint = xtime.tv_sec;
@@ -293,6 +333,8 @@ static void init_rcu_boost_early(void)
 				for (j = 0; j < N_RCU_BOOST_DAT_EVENTS; j++)
 					for (k = 0; k < N_RCU_BOOST_STATE; k++)
 						rbdp[i].rbs_stats[j][k] = 0;
+				for (j = 0; j < N_RCU_BOOSTER_STATE; j++)
+					rbdp[i].rbs_qw_stats[j] = 0;
 			}
 #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
 		}
@@ -378,10 +420,11 @@ static void rcu_unboost_prio(struct task
 /*
  * Boost all of the RCU-reader tasks on the specified list.
  */
-static void rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp)
+static int rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp)
 {
 	LIST_HEAD(list);
 	unsigned long flags;
+	int retval = 0;
 	struct task_struct *taskp;
 
 	/*
@@ -397,6 +440,7 @@ static void rcu_boost_one_reader_list(st
 	list_splice_init(&rbdp->rbs_toboost, &list);
 	list_splice_init(&rbdp->rbs_boosted, &list);
 	while (!list_empty(&list)) {
+		retval = 1;
 
 		/*
 		 * Pause for a bit before boosting each task.
@@ -438,6 +482,36 @@ static void rcu_boost_one_reader_list(st
 		list_add_tail(&taskp->rcub_entry, &rbdp->rbs_boosted);
 	}
 	spin_unlock_irqrestore(&rbdp->rbs_lock, flags);
+	return retval;
+}
+
+/*
+ * Examine state to see if it is time to sleep.
+ */
+static void rcu_booster_try_sleep(int yo)
+{
+	spin_lock(&rcu_boost_quiesce_lock);
+	if (rcu_booster_quiesce_state < 0 ||
+	    rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE)
+		rcu_booster_quiesce_state = RCU_BOOST_INVALID;
+	rbs_qs_stats[yo != 0][rcu_booster_quiesce_state]++;
+	if (yo != 0) {
+		rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+	} else {
+		if (rcu_booster_quiesce_state == RCU_BOOSTER_ACTIVE) {
+			rcu_booster_quiesce_state = RCU_BOOSTER_DROWSY;
+		} else if (rcu_booster_quiesce_state == RCU_BOOSTER_DROWSY) {
+			rcu_booster_quiesce_state = RCU_BOOSTER_SLEEPING;
+			spin_unlock(&rcu_boost_quiesce_lock);
+			__wait_event(rcu_booster_quiesce_wq,
+				     rcu_booster_quiesce_state ==
+				     RCU_BOOSTER_ACTIVE);
+			spin_lock(&rcu_boost_quiesce_lock);
+		} else {
+			rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+		}
+	}
+	spin_unlock(&rcu_boost_quiesce_lock);
 }
 
 /*
@@ -448,15 +522,21 @@ static int rcu_booster(void *arg)
 {
 	int cpu;
 	struct sched_param sp = { .sched_priority = PREEMPT_RCU_BOOSTER_PRIO, };
+	int yo = 0;
 
 	sched_setscheduler(current, SCHED_RR, &sp);
 	current->flags |= PF_NOFREEZE;
+	init_waitqueue_head(&rcu_booster_quiesce_wq);
 
 	do {
 
 		/* Advance the lists of tasks. */
 
 		rcu_boost_idx = (rcu_boost_idx + 1) % RCU_BOOST_ELEMENTS;
+		if (rcu_boost_idx == 0) {
+			rcu_booster_try_sleep(yo);
+			yo = 0;
+		}
 		for_each_possible_cpu(cpu) {
 
 			/*
@@ -469,7 +549,7 @@ static int rcu_booster(void *arg)
 			 * nothing.
 			 */
 
-			rcu_boost_one_reader_list(rcu_rbd_boosting(cpu));
+			yo += rcu_boost_one_reader_list(rcu_rbd_boosting(cpu));
 
 			/*
 			 * Large SMP systems may need to sleep sometimes
@@ -511,6 +591,23 @@ void init_rcu_boost_late(void)
 }
 
 /*
+ * Awaken the RCU priority booster if neecessary.
+ */
+static void rcu_preempt_wake(struct rcu_boost_dat *rbdp)
+{
+	spin_lock(&rcu_boost_quiesce_lock);
+	if (rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE)
+		rcu_booster_quiesce_state = RCU_BOOSTER_INVALID;
+	rbdp->rbs_qw_stats[rcu_booster_quiesce_state]++;
+	if (rcu_booster_quiesce_state == RCU_BOOSTER_SLEEPING) {
+		rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+		wake_up(&rcu_booster_quiesce_wq);
+	} else if (rcu_booster_quiesce_state != RCU_BOOSTER_ACTIVE)
+		rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE;
+	spin_unlock(&rcu_boost_quiesce_lock);
+}
+
+/*
  * Update task's RCU-boost state to reflect blocking in RCU read-side
  * critical section, so that the RCU-boost task can find it in case it
  * later needs its priority boosted.
@@ -532,6 +629,7 @@ void __rcu_preempt_boost(void)
 	}
 	spin_lock(&rbdp->rbs_lock);
 	rbdp->rbs_blocked++;
+	rcu_preempt_wake(rbdp);
 
 	/*
 	 * Update state.  We hold the lock and aren't yet on the list,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux