Re: [rfc patch] Re: sched: incorrect argument used in task_hot()

On Fri, 2006-11-17 at 13:39 -0800, Andrew Morton wrote:
> On Fri, 17 Nov 2006 22:30:34 +0100
> Mike Galbraith <[email protected]> wrote:
> 
> > On Fri, 2006-11-17 at 20:20 +0100, Ingo Molnar wrote:
> > > * Mike Galbraith <[email protected]> wrote:
> > > 
> > > > One way to improve granularity, and eliminate the possibility of 
> > > > p->last_run being > rq->timestamp_tast_tick, and thereby short 
> > > > circuiting the evaluation of cache_hot_time, is to cache the last 
> > > > return of sched_clock() at both tick and sched times, and use that 
> > > > value as our reference instead of the absolute time of the tick.  It 
> > > > won't totally eliminate skew, but it moves the reference point closer 
> > > > to the current time on the remote cpu.
> > > > 
> > > > Looking for a good place to do this, I chose update_cpu_clock().
> > > 
> > > looks good to me - thus we will update the timestamp not only in the 
> > > timer tick, but also upon every context-switch (when we acquire 
> > > sched_clock() value anyway). Lets try this in -mm?
> > > 
> > > Acked-by: Ingo Molnar <[email protected]>
> > 
> > Then it needs a blame line.
> > 
> > Signed-off-by: Mike Galbraith <[email protected]>
> >
> 
> And a changelog, then we're all set!
> 
> Oh.  And a patch, too.

Co-opt rq->timestamp_last_tick to maintain a cache_hot_time evaluation
reference timestamp at both tick and sched times to prevent said
reference, formerly rq->timestamp_last_tick, from being behind
task->last_ran at evaluation time, and to move said reference closer to
current time on the remote processor, intent being to improve cache hot
evaluation and timestamp adjustment accuracy for task migration.

Fix minor sched_time double accounting error which occurs when a task
passing through schedule() does not schedule off, and takes the next
timer tick.

Signed-off-by: Mike Galbraith <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Acked-by: Ken Chen <[email protected]>

--- linux-2.6.19-rc5-mm2/kernel/sched.c.org	2006-11-15 10:14:07.000000000 +0100
+++ linux-2.6.19-rc5-mm2/kernel/sched.c	2006-11-17 15:42:46.000000000 +0100
@@ -226,7 +226,8 @@ struct rq {
 	unsigned long nr_uninterruptible;
 
 	unsigned long expired_timestamp;
-	unsigned long long timestamp_last_tick;
+	/* Cached timestamp set by update_cpu_clock() */
+	unsigned long long most_recent_timestamp;
 	struct task_struct *curr, *idle;
 	struct mm_struct *prev_mm;
 	struct prio_array *active, *expired, arrays[2];
@@ -947,8 +948,8 @@ static void activate_task(struct task_st
 	if (!local) {
 		/* Compensate for drifting sched_clock */
 		struct rq *this_rq = this_rq();
-		now = (now - this_rq->timestamp_last_tick)
-			+ rq->timestamp_last_tick;
+		now = (now - this_rq->most_recent_timestamp)
+			+ rq->most_recent_timestamp;
 	}
 #endif
 
@@ -1694,8 +1695,8 @@ void fastcall wake_up_new_task(struct ta
 		 * Not the local CPU - must adjust timestamp. This should
 		 * get optimised away in the !CONFIG_SMP case.
 		 */
-		p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
-					+ rq->timestamp_last_tick;
+		p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
+					+ rq->most_recent_timestamp;
 		__activate_task(p, rq);
 		if (TASK_PREEMPTS_CURR(p, rq))
 			resched_task(rq->curr);
@@ -2067,8 +2068,8 @@ static void pull_task(struct rq *src_rq,
 	set_task_cpu(p, this_cpu);
 	inc_nr_running(p, this_rq);
 	enqueue_task(p, this_array);
-	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
-				+ this_rq->timestamp_last_tick;
+	p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
+				+ this_rq->most_recent_timestamp;
 	/*
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
@@ -2107,7 +2108,7 @@ int can_migrate_task(struct task_struct 
 	if (sd->nr_balance_failed > sd->cache_nice_tries)
 		return 1;
 
-	if (task_hot(p, rq->timestamp_last_tick, sd))
+	if (task_hot(p, rq->most_recent_timestamp, sd))
 		return 0;
 	return 1;
 }
@@ -2206,7 +2207,7 @@ skip_queue:
 	}
 
 #ifdef CONFIG_SCHEDSTATS
-	if (task_hot(tmp, busiest->timestamp_last_tick, sd))
+	if (task_hot(tmp, busiest->most_recent_timestamp, sd))
 		schedstat_inc(sd, lb_hot_gained[idle]);
 #endif
 
@@ -2943,7 +2944,8 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void
 update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
 {
-	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
+	p->sched_time += now - p->timestamp;
+	p->timestamp = rq->most_recent_timestamp = now;
 }
 
 /*
@@ -2956,8 +2958,7 @@ unsigned long long current_sched_time(co
 	unsigned long flags;
 
 	local_irq_save(flags);
-	ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
-	ns = p->sched_time + sched_clock() - ns;
+	ns = p->sched_time + sched_clock() - p->timestamp;
 	local_irq_restore(flags);
 
 	return ns;
@@ -3073,8 +3074,6 @@ void scheduler_tick(void)
 
 	update_cpu_clock(p, rq, now);
 
-	rq->timestamp_last_tick = now;
-
 	if (p == rq->idle) {
 		if (wake_priority_sleeper(rq))
 			goto out;
@@ -3466,11 +3465,10 @@ switch_tasks:
 	prev->sleep_avg -= run_time;
 	if ((long)prev->sleep_avg <= 0)
 		prev->sleep_avg = 0;
-	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
-		next->timestamp = now;
+		next->timestamp = prev->last_ran = now;
 		rq->nr_switches++;
 		rq->curr = next;
 		++*switch_count;
@@ -5000,8 +4998,8 @@ static int __migrate_task(struct task_st
 		 * afterwards, and pretending it was a local activate.
 		 * This way is cleaner and logically correct.
 		 */
-		p->timestamp = p->timestamp - rq_src->timestamp_last_tick
-				+ rq_dest->timestamp_last_tick;
+		p->timestamp = p->timestamp - rq_src->most_recent_timestamp
+				+ rq_dest->most_recent_timestamp;
 		deactivate_task(p, rq_src);
 		__activate_task(p, rq_dest);
 		if (TASK_PREEMPTS_CURR(p, rq_dest))


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Follow-Ups:
- RE: [rfc patch] Re: sched: incorrect argument used in task_hot()
  - From: "Chen, Kenneth W" <[email protected]>

References:
- sched: incorrect argument used in task_hot()
  - From: "Chen, Kenneth W" <[email protected]>
- [rfc patch] Re: sched: incorrect argument used in task_hot()
  - From: Mike Galbraith <[email protected]>
- Re: [rfc patch] Re: sched: incorrect argument used in task_hot()
  - From: Ingo Molnar <[email protected]>
- Re: [rfc patch] Re: sched: incorrect argument used in task_hot()
  - From: Mike Galbraith <[email protected]>
- Re: [rfc patch] Re: sched: incorrect argument used in task_hot()
  - From: Andrew Morton <[email protected]>

Prev by Date: Re: [-mm patch] remove drivers/pci/search.c:pci_find_device_reverse()
Next by Date: Re: [PATCH 1/2] Make x86_64 udelay() round up instead of down - try2
Previous by thread: Re: [rfc patch] Re: sched: incorrect argument used in task_hot()
Next by thread: RE: [rfc patch] Re: sched: incorrect argument used in task_hot()
Index(es):
- Date
- Thread

[Index of Archives] [Kernel Newbies] [Netfilter] [Bugtraq] [Photo] [Stuff] [Gimp] [Yosemite News] [MIPS Linux] [ARM Linux] [Linux Security] [Linux RAID] [Video 4 Linux] [Linux for the blind] [Linux Resources]