Re: sched_clock() uses are broken

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 02, 2006 at 03:23:01PM -0400, Nicolas Pitre wrote:
> On Tue, 2 May 2006, Russell King wrote:
> 
> > On Tue, May 02, 2006 at 03:05:22PM -0400, Nicolas Pitre wrote:
> > > If we're discussing the addition of a sched_clock_diff(), why whouldn't 
> > > shed_clock() return anything it wants in that context?  It could be 
> > > redefined to have a return value meaningful only to shed_clock_diff()?
> > 
> > If we're talking about doing that, we need to replace sched_clock()
> > to ensure that we all users are aware that it has changed.
> > 
> > I did think about that for my original fix proposal, but stepped back
> > because that's a bigger change - and is something for post-2.6.17.
> > The smallest fix (suitable for -rc kernels) is as I detailed.
> 
> Oh agreed.

(Added Ingo - don't know if you saw my original message.)

Actually, I'm not entirely convinced that the smallest fix is going to
be the best solution - it looks too complex.

Note the code change in update_cpu_clock and sched_current_time - arguably
both are buggy as they stand today when the values wrap.

Comments anyone?

diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -988,6 +988,11 @@ static inline int set_cpus_allowed(task_
 extern unsigned long long sched_clock(void);
 extern unsigned long long current_sched_time(const task_t *current_task);
 
+static inline unsigned long long sched_clock_diff(unsigned long long t1, unsigned long long t0)
+{
+	return t1 - t0;
+}
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
diff --git a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -731,7 +731,7 @@ static inline void __activate_idle_task(
 static int recalc_task_prio(task_t *p, unsigned long long now)
 {
 	/* Caller must always ensure 'now >= p->timestamp' */
-	unsigned long long __sleep_time = now - p->timestamp;
+	unsigned long long __sleep_time = sched_clock_diff(now, p->timestamp);
 	unsigned long sleep_time;
 
 	if (batch_task(p))
@@ -807,7 +807,7 @@ static void activate_task(task_t *p, run
 	if (!local) {
 		/* Compensate for drifting sched_clock */
 		runqueue_t *this_rq = this_rq();
-		now = (now - this_rq->timestamp_last_tick)
+		now = sched_clock_diff(now, this_rq->timestamp_last_tick)
 			+ rq->timestamp_last_tick;
 	}
 #endif
@@ -2512,8 +2512,10 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void update_cpu_clock(task_t *p, runqueue_t *rq,
 				    unsigned long long now)
 {
-	unsigned long long last = max(p->timestamp, rq->timestamp_last_tick);
-	p->sched_time += now - last;
+	unsigned long long d1, d2;
+	d1 = sched_clock_diff(now, p->timestamp);
+	d2 = sched_clock_diff(now, rq->timestamp_last_tick);
+	p->sched_time += min(d1, d2);
 }
 
 /*
@@ -2522,11 +2524,13 @@ static inline void update_cpu_clock(task
  */
 unsigned long long current_sched_time(const task_t *tsk)
 {
-	unsigned long long ns;
+	unsigned long long now, d1, d2, ns;
 	unsigned long flags;
 	local_irq_save(flags);
-	ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick);
-	ns = tsk->sched_time + (sched_clock() - ns);
+	now = sched_clock();
+	d1 = sched_clock_diff(now, tsk->timestamp);
+	d2 = sched_clock_diff(now, task_rq(tsk)->timestamp_last_tick);
+	ns = tsk->sched_time + min(d1, d2);
 	local_irq_restore(flags);
 	return ns;
 }
@@ -2925,7 +2929,7 @@ asmlinkage void __sched schedule(void)
 	runqueue_t *rq;
 	prio_array_t *array;
 	struct list_head *queue;
-	unsigned long long now;
+	unsigned long long now, sleep;
 	unsigned long run_time;
 	int cpu, idx, new_prio;
 
@@ -2960,11 +2964,10 @@ need_resched_nonpreemptible:
 
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
-	if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
-		run_time = now - prev->timestamp;
-		if (unlikely((long long)(now - prev->timestamp) < 0))
-			run_time = 0;
-	} else
+	sleep = sched_clock_diff(now, prev->timestamp);
+	if (likely(sleep < NS_MAX_SLEEP_AVG))
+		run_time = sleep;
+	else
 		run_time = NS_MAX_SLEEP_AVG;
 
 	/*
@@ -3039,8 +3042,8 @@ go_idle:
 	next = list_entry(queue->next, task_t, run_list);
 
 	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
-		unsigned long long delta = now - next->timestamp;
-		if (unlikely((long long)(now - next->timestamp) < 0))
+		unsigned long long delta = sched_clock_diff(now, next->timestamp);
+		if (unlikely((long long)delta < 0))
 			delta = 0;
 
 		if (next->sleep_type == SLEEP_INTERACTIVE)


-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux