Re: Definition of fairness (was Re: [patch] CFS scheduler, -v11)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2007-05-09 at 23:32 +0530, Srivatsa Vaddagiri wrote:

> Ingo,
> 	I had a question with respect to the definition of fairness used, esp
> for tasks that are not 100% cpu hogs.
> 
> Ex: consider two equally important tasks T1 and T2 running on same CPU and 
> whose execution nature is:
> 
> 	T1 = 100% cpu hog
> 	T2 = 60% cpu hog (run for 600ms, sleep for 400ms)
> 
> Over a arbitrary observation period of 10 sec, 
> 
> 	T1 was ready to run for all 10sec
> 	T2 was ready to run for 6 sec
> 
> Over this observation period, how much execution time should T2 get,
> under a "fair" scheduler?
> 
> I was expecting both T2 and T1 to get 5 sec (50:50 split). Is this a
> wrong expectation of fairness?

Depends on how long your fairness yardstick is I suppose.

> Anyway, results of this experiment (using testcase attached) is below.
> T2 gets way below its fair share IMO (under both cfs and sd).
> 
> 
> 2.6.21.1:
> 
>  5444 vatsa     16   0  2468  460  388 R   59  0.0   0:19.76 3 T1
>  5443 vatsa     25   0  2468  460  388 R   40  0.0   0:15.36 3 T2
> 
> 
> 2.6.21.1 + cfs-v11:
> 
>  5460 vatsa     31   0  2464  460  388 R   70  0.0   0:15.28 3 T1
>  5461 vatsa     29   0  2468  460  388 R   30  0.0   0:05.65 3 T2
> 
> 
> 2.6.21 + sd-0.48:
> 
>  5459 vatsa     23   0  2468  460  388 R   70  0.0   0:17.02 3 T1
>  5460 vatsa     21   0  2464  460  388 R   30  0.0   0:06.21 3 T2
> 
> 
> Note: 
> 
> T1 is started as ./cpuhog 600 0 10 > /dev/null &

 6524 root      20   0  1432  396  336 S   51  0.0   2:31.83 1 cpuhog
 6525 root      20   0  1436  356  296 R   48  0.0   2:25.76 1 chew

> T2 is started as ./cpuhog 600 400 10 > /dev/null &

That's cpuhog in the above, and chew is the 100% hog.  The below is
cpuhog as you started them. 

 6565 root      20   0  1436  396  336 R   51  0.0   1:49.30 1 cpuhog
 6566 root      20   0  1432  396  336 S   49  0.0   1:59.16 1 cpuhog

FWIW, I can squeeze some better long term fairness out of it by doing
the below.

If a task isn't going to sleep, it's always in competition with someone
even if it happens to be the invisible man right now, so runners will
drift to the right.  Also,  these two tasks aren't necessarily the same
at dequeue time wrt the effect they have (or rather _will_ have) on the
system, but are being treated as if they're identical twins.

	-Mike

Experimental gefingerpoken und mittengrabben.

--- kernel/sched_fair.c.org	2007-05-08 07:51:48.000000000 +0200
+++ kernel/sched_fair.c	2007-05-10 09:43:17.000000000 +0200
@@ -20,7 +20,7 @@ unsigned int sysctl_sched_granularity __
 
 unsigned int sysctl_sched_sleep_history_max __read_mostly = 2000000000;
 
-unsigned int sysctl_sched_load_smoothing = 1 | 2 | 4 | 0;
+unsigned int sysctl_sched_load_smoothing = 0 | 0 | 0 | 0 | 16;
 
 /*
  * Wake-up granularity.
@@ -140,6 +140,8 @@ static void limit_wait_runtime(struct ta
 {
 	s64 limit = sysctl_sched_granularity * 16;
 
+	if (sysctl_sched_load_smoothing & 16)
+		limit = sysctl_sched_sleep_history_max >> 1;
 	if (p->wait_runtime > limit)
 		p->wait_runtime = limit;
 	if (p->wait_runtime < -limit)
@@ -150,10 +152,11 @@ static void limit_wait_runtime(struct ta
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
  */
-static inline void update_curr(struct rq *rq, u64 now)
+static inline void update_curr(struct rq *rq, u64 now, int load_weight)
 {
 	u64 delta_exec, delta_fair, delta_mine;
 	struct task_struct *curr = rq->curr;
+	unsigned long load = get_rq_load(rq);
 
 	if (curr->sched_class != &fair_sched_class || curr == rq->idle
 			|| !curr->on_rq)
@@ -167,8 +170,6 @@ static inline void update_curr(struct rq
 		curr->exec_max = delta_exec;
 
 	if (sysctl_sched_load_smoothing & 1) {
-		unsigned long load = get_rq_load(rq);
-
 		if (sysctl_sched_load_smoothing & 2) {
 			delta_fair = delta_exec << NICE_0_SHIFT;
 			do_div(delta_fair, load);
@@ -178,13 +179,13 @@ static inline void update_curr(struct rq
 		}
 
 		delta_mine = delta_exec * curr->load_weight;
-		do_div(delta_mine, load);
+		do_div(delta_mine, load + load_weight);
 	} else {
 		delta_fair = delta_exec << NICE_0_SHIFT;
-		do_div(delta_fair, rq->raw_weighted_load);
+		do_div(delta_fair, load);
 
 		delta_mine = delta_exec * curr->load_weight;
-		do_div(delta_mine, rq->raw_weighted_load);
+		do_div(delta_mine, load + load_weight);
 	}
 
 	curr->sum_exec_runtime += delta_exec;
@@ -300,7 +301,7 @@ update_stats_wait_end(struct rq *rq, str
 static inline void
 update_stats_dequeue(struct rq *rq, struct task_struct *p, u64 now)
 {
-	update_curr(rq, now);
+	update_curr(rq, now, !p->sleep_start_fair ? NICE_0_LOAD : 0);
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
@@ -327,7 +328,7 @@ update_stats_curr_start(struct rq *rq, s
 static inline void
 update_stats_curr_end(struct rq *rq, struct task_struct *p, u64 now)
 {
-	update_curr(rq, now);
+	update_curr(rq, now, !p->sleep_start_fair ? NICE_0_LOAD : 0);
 
 	p->exec_start = 0;
 }
@@ -378,7 +379,7 @@ enqueue_task_fair(struct rq *rq, struct 
 	/*
 	 * Update the fair clock.
 	 */
-	update_curr(rq, now);
+	update_curr(rq, now, 0);
 
 	if (wakeup) {
 		if (p->sleep_start) {



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux