* Willy Tarreau <[email protected]> wrote:
> Have you tried previous version with the fair-fork patch ? It might be
> possible that your workload is sensible to the fork()'s child getting
> much CPU upon startup.
the fair-fork patch is now included in -v2, but that was already in
-v2-rc0 too that i sent to Gene separately. I've attached the
-rc0->final delta.
Gene, could you please apply this patch to your -v2-rc0 tree and do a
quick double-check that indeed these changes cause the regression?
Ingo
---
include/linux/sched.h | 7 +
kernel/exit.c | 2
kernel/posix-cpu-timers.c | 24 ++---
kernel/rtmutex.c | 2
kernel/sched.c | 191 +++++++++++++++++++++++++---------------------
kernel/sched_debug.c | 14 +--
kernel/sched_fair.c | 80 +++++++++++++------
kernel/sched_rt.c | 21 +++++
kernel/sysctl.c | 8 +
9 files changed, 218 insertions(+), 131 deletions(-)
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -798,12 +798,15 @@ struct sched_class {
void (*dequeue_task) (struct rq *rq, struct task_struct *p);
void (*requeue_task) (struct rq *rq, struct task_struct *p);
+ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
struct task_struct * (*load_balance_start) (struct rq *rq);
struct task_struct * (*load_balance_next) (struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p);
+ void (*task_new) (struct rq *rq, struct task_struct *p);
void (*task_init) (struct rq *rq, struct task_struct *p);
};
@@ -838,7 +841,8 @@ struct task_struct {
u64 last_ran;
s64 wait_runtime;
- u64 exec_runtime, fair_key;
+ u64 sum_exec_runtime, fair_key;
+ s64 sum_wait_runtime;
long nice_offset;
s64 hog_limit;
@@ -1236,6 +1240,7 @@ extern char * sched_print_task_state(str
extern unsigned int sysctl_sched_max_hog_history;
extern unsigned int sysctl_sched_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
Index: linux/kernel/exit.c
===================================================================
--- linux.orig/kernel/exit.c
+++ linux/kernel/exit.c
@@ -112,7 +112,7 @@ static void __exit_signal(struct task_st
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
sig->nivcsw += tsk->nivcsw;
- sig->sum_sched_runtime += tsk->exec_runtime;
+ sig->sum_sched_runtime += tsk->sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
Index: linux/kernel/posix-cpu-timers.c
===================================================================
--- linux.orig/kernel/posix-cpu-timers.c
+++ linux/kernel/posix-cpu-timers.c
@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struc
}
static inline unsigned long long sched_ns(struct task_struct *p)
{
- return (p == current) ? current_sched_runtime(p) : p->exec_runtime;
+ return (p == current) ? current_sched_runtime(p) : p->sum_exec_runtime;
}
int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
@@ -249,7 +249,7 @@ static int cpu_clock_sample_group_locked
cpu->sched = p->signal->sum_sched_runtime;
/* Add in each other live thread. */
while ((t = next_thread(t)) != p) {
- cpu->sched += t->exec_runtime;
+ cpu->sched += t->sum_exec_runtime;
}
cpu->sched += sched_ns(p);
break;
@@ -422,7 +422,7 @@ int posix_cpu_timer_del(struct k_itimer
*/
static void cleanup_timers(struct list_head *head,
cputime_t utime, cputime_t stime,
- unsigned long long exec_runtime)
+ unsigned long long sum_exec_runtime)
{
struct cpu_timer_list *timer, *next;
cputime_t ptime = cputime_add(utime, stime);
@@ -451,10 +451,10 @@ static void cleanup_timers(struct list_h
++head;
list_for_each_entry_safe(timer, next, head, entry) {
list_del_init(&timer->entry);
- if (timer->expires.sched < exec_runtime) {
+ if (timer->expires.sched < sum_exec_runtime) {
timer->expires.sched = 0;
} else {
- timer->expires.sched -= exec_runtime;
+ timer->expires.sched -= sum_exec_runtime;
}
}
}
@@ -467,7 +467,7 @@ static void cleanup_timers(struct list_h
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk->exec_runtime);
+ tsk->utime, tsk->stime, tsk->sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
@@ -475,7 +475,7 @@ void posix_cpu_timers_exit_group(struct
cleanup_timers(tsk->signal->cpu_timers,
cputime_add(tsk->utime, tsk->signal->utime),
cputime_add(tsk->stime, tsk->signal->stime),
- tsk->exec_runtime + tsk->signal->sum_sched_runtime);
+ tsk->sum_exec_runtime + tsk->signal->sum_sched_runtime);
}
@@ -536,7 +536,7 @@ static void process_timer_rebalance(stru
nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
- ns = t->exec_runtime + nsleft;
+ ns = t->sum_exec_runtime + nsleft;
if (t->it_sched_expires == 0 ||
t->it_sched_expires > ns) {
t->it_sched_expires = ns;
@@ -1004,7 +1004,7 @@ static void check_thread_timers(struct t
struct cpu_timer_list *t = list_entry(timers->next,
struct cpu_timer_list,
entry);
- if (!--maxfire || tsk->exec_runtime < t->expires.sched) {
+ if (!--maxfire || tsk->sum_exec_runtime < t->expires.sched) {
tsk->it_sched_expires = t->expires.sched;
break;
}
@@ -1049,7 +1049,7 @@ static void check_process_timers(struct
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
- sum_sched_runtime += t->exec_runtime;
+ sum_sched_runtime += t->sum_exec_runtime;
t = next_thread(t);
} while (t != tsk);
ptime = cputime_add(utime, stime);
@@ -1208,7 +1208,7 @@ static void check_process_timers(struct
t->it_virt_expires = ticks;
}
- sched = t->exec_runtime + sched_left;
+ sched = t->sum_exec_runtime + sched_left;
if (sched_expires && (t->it_sched_expires == 0 ||
t->it_sched_expires > sched)) {
t->it_sched_expires = sched;
@@ -1300,7 +1300,7 @@ void run_posix_cpu_timers(struct task_st
if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
(tsk->it_sched_expires == 0 ||
- tsk->exec_runtime < tsk->it_sched_expires))
+ tsk->sum_exec_runtime < tsk->it_sched_expires))
return;
#undef UNEXPIRED
Index: linux/kernel/rtmutex.c
===================================================================
--- linux.orig/kernel/rtmutex.c
+++ linux/kernel/rtmutex.c
@@ -337,7 +337,7 @@ static inline int try_to_steal_lock(stru
* interrupted, so we would delay a waiter with higher
* priority as current->normal_prio.
*
- * Note: in the rare case of a SCHED_FAIR task changing
+ * Note: in the rare case of a SCHED_OTHER task changing
* its priority and thus stealing the lock, next->task
* might be current:
*/
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -101,8 +101,10 @@ unsigned long long __attribute__((weak))
#define MIN_TIMESLICE max(5 * HZ / 1000, 1)
#define DEF_TIMESLICE (100 * HZ / 1000)
-#define TASK_PREEMPTS_CURR(p, rq) \
- ((p)->prio < (rq)->curr->prio)
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+{
+ p->sched_class->check_preempt_curr(rq, p);
+}
#define SCALE_PRIO(x, prio) \
max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
@@ -227,7 +229,7 @@ char * sched_print_task_state(struct tas
P(exec_start);
P(last_ran);
P(wait_runtime);
- P(exec_runtime);
+ P(sum_exec_runtime);
#undef P
t0 = sched_clock();
@@ -431,38 +433,46 @@ static inline struct rq *this_rq_lock(vo
return rq;
}
-#include "sched_stats.h"
-#include "sched_rt.c"
-#include "sched_fair.c"
-#include "sched_debug.c"
+/*
+ * resched_task - mark a task 'to be rescheduled now'.
+ *
+ * On UP this means the setting of the need_resched flag, on SMP it
+ * might also involve a cross-CPU call to trigger the scheduler on
+ * the target CPU.
+ */
+#ifdef CONFIG_SMP
-#define sched_class_highest (&rt_sched_class)
+#ifndef tsk_is_polling
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+#endif
-static void enqueue_task(struct rq *rq, struct task_struct *p)
+static void resched_task(struct task_struct *p)
{
- sched_info_queued(p);
- p->sched_class->enqueue_task(rq, p);
- p->on_rq = 1;
-}
+ int cpu;
-static void dequeue_task(struct rq *rq, struct task_struct *p)
-{
- p->sched_class->dequeue_task(rq, p);
- p->on_rq = 0;
-}
+ assert_spin_locked(&task_rq(p)->lock);
-static void requeue_task(struct rq *rq, struct task_struct *p)
-{
- p->sched_class->requeue_task(rq, p);
-}
+ if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ return;
-/*
- * __normal_prio - return the priority that is based on the static prio
- */
-static inline int __normal_prio(struct task_struct *p)
+ set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+
+ cpu = task_cpu(p);
+ if (cpu == smp_processor_id())
+ return;
+
+ /* NEED_RESCHED must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(p))
+ smp_send_reschedule(cpu);
+}
+#else
+static inline void resched_task(struct task_struct *p)
{
- return p->static_prio;
+ assert_spin_locked(&task_rq(p)->lock);
+ set_tsk_need_resched(p);
}
+#endif
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -528,6 +538,41 @@ static inline void dec_nr_running(struct
dec_raw_weighted_load(rq, p);
}
+static void activate_task(struct rq *rq, struct task_struct *p);
+
+#include "sched_stats.h"
+#include "sched_rt.c"
+#include "sched_fair.c"
+#include "sched_debug.c"
+
+#define sched_class_highest (&rt_sched_class)
+
+static void enqueue_task(struct rq *rq, struct task_struct *p)
+{
+ sched_info_queued(p);
+ p->sched_class->enqueue_task(rq, p);
+ p->on_rq = 1;
+}
+
+static void dequeue_task(struct rq *rq, struct task_struct *p)
+{
+ p->sched_class->dequeue_task(rq, p);
+ p->on_rq = 0;
+}
+
+static void requeue_task(struct rq *rq, struct task_struct *p)
+{
+ p->sched_class->requeue_task(rq, p);
+}
+
+/*
+ * __normal_prio - return the priority that is based on the static prio
+ */
+static inline int __normal_prio(struct task_struct *p)
+{
+ return p->static_prio;
+}
+
/*
* Calculate the expected normal priority: i.e. priority
* without taking RT-inheritance into account. Might be
@@ -593,47 +638,6 @@ static void deactivate_task(struct rq *r
dec_nr_running(p, rq);
}
-/*
- * resched_task - mark a task 'to be rescheduled now'.
- *
- * On UP this means the setting of the need_resched flag, on SMP it
- * might also involve a cross-CPU call to trigger the scheduler on
- * the target CPU.
- */
-#ifdef CONFIG_SMP
-
-#ifndef tsk_is_polling
-#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
-#endif
-
-static void resched_task(struct task_struct *p)
-{
- int cpu;
-
- assert_spin_locked(&task_rq(p)->lock);
-
- if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
- return;
-
- set_tsk_thread_flag(p, TIF_NEED_RESCHED);
-
- cpu = task_cpu(p);
- if (cpu == smp_processor_id())
- return;
-
- /* NEED_RESCHED must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(p))
- smp_send_reschedule(cpu);
-}
-#else
-static inline void resched_task(struct task_struct *p)
-{
- assert_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched(p);
-}
-#endif
-
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
@@ -1113,10 +1117,8 @@ out_activate:
* the waker guarantees that the freshly woken up task is going
* to be considered on this CPU.)
*/
- if (!sync || cpu != this_cpu) {
- if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
- }
+ if (!sync || cpu != this_cpu)
+ check_preempt_curr(rq, p);
success = 1;
out_running:
@@ -1159,7 +1161,8 @@ static void task_running_tick(struct rq
static void __sched_fork(struct task_struct *p)
{
p->wait_start_fair = p->exec_start = p->last_ran = 0;
- p->exec_runtime = p->wait_runtime = 0;
+ p->sum_exec_runtime = p->wait_runtime = 0;
+ p->sum_wait_runtime = 0;
INIT_LIST_HEAD(&p->run_list);
p->on_rq = 0;
@@ -1208,6 +1211,12 @@ void sched_fork(struct task_struct *p, i
}
/*
+ * After fork, child runs first. (default) If set to 0 then
+ * parent will (try to) run first.
+ */
+unsigned int __read_mostly sysctl_sched_child_runs_first = 1;
+
+/*
* wake_up_new_task - wake up a newly created task for the first time.
*
* This function will do some initial scheduler statistics housekeeping
@@ -1218,15 +1227,25 @@ void fastcall wake_up_new_task(struct ta
{
unsigned long flags;
struct rq *rq;
+ int this_cpu;
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
+ this_cpu = smp_processor_id(); /* parent's CPU */
p->prio = effective_prio(p);
- activate_task(rq, p);
- if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
+ if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) ||
+ task_cpu(p) != this_cpu || !current->on_rq) {
+ activate_task(rq, p);
+ } else {
+ /*
+ * Let the scheduling class do new task startup
+ * management (if any):
+ */
+ p->sched_class->task_new(rq, p);
+ }
+ check_preempt_curr(rq, p);
task_rq_unlock(rq, &flags);
}
@@ -1559,8 +1578,7 @@ static void pull_task(struct rq *src_rq,
* Note that idle threads have a prio of MAX_PRIO, for this test
* to be always true for them.
*/
- if (TASK_PREEMPTS_CURR(p, this_rq))
- resched_task(this_rq->curr);
+ check_preempt_curr(this_rq, p);
}
/*
@@ -2467,7 +2485,7 @@ DEFINE_PER_CPU(struct kernel_stat, kstat
EXPORT_PER_CPU_SYMBOL(kstat);
/*
- * Return current->exec_runtime plus any more ns on the sched_clock
+ * Return current->sum_exec_runtime plus any more ns on the sched_clock
* that have not yet been banked.
*/
unsigned long long current_sched_runtime(const struct task_struct *p)
@@ -2476,7 +2494,7 @@ unsigned long long current_sched_runtime
unsigned long flags;
local_irq_save(flags);
- ns = p->exec_runtime + sched_clock() - p->last_ran;
+ ns = p->sum_exec_runtime + sched_clock() - p->last_ran;
local_irq_restore(flags);
return ns;
@@ -3176,8 +3194,9 @@ void rt_mutex_setprio(struct task_struct
if (task_running(rq, p)) {
if (p->prio > oldprio)
resched_task(rq->curr);
- } else if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
+ } else {
+ check_preempt_curr(rq, p);
+ }
}
task_rq_unlock(rq, &flags);
}
@@ -3469,8 +3488,9 @@ recheck:
if (task_running(rq, p)) {
if (p->prio > oldprio)
resched_task(rq->curr);
- } else if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
+ } else {
+ check_preempt_curr(rq, p);
+ }
}
__task_rq_unlock(rq);
spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -4183,8 +4203,7 @@ static int __migrate_task(struct task_st
if (p->on_rq) {
deactivate_task(rq_src, p);
activate_task(rq_dest, p);
- if (TASK_PREEMPTS_CURR(p, rq_dest))
- resched_task(rq_dest->curr);
+ check_preempt_curr(rq_dest, p);
}
ret = 1;
out:
Index: linux/kernel/sched_debug.c
===================================================================
--- linux.orig/kernel/sched_debug.c
+++ linux/kernel/sched_debug.c
@@ -51,10 +51,10 @@ print_task(struct seq_file *m, struct rq
p->prio,
p->nice_offset,
p->hog_limit,
- p->wait_start_fair,
+ p->wait_start_fair - rq->fair_clock,
p->exec_start,
- p->last_ran,
- p->exec_runtime);
+ p->sum_exec_runtime,
+ p->sum_wait_runtime);
}
static void print_rq(struct seq_file *m, struct rq *rq, u64 now)
@@ -66,10 +66,10 @@ static void print_rq(struct seq_file *m,
"\nrunnable tasks:\n"
" task PID tree-key delta waiting"
" switches prio nice-offset hog-limit wstart-fair exec-start"
- " last-ran exec-runtime\n"
- "------------------------------------------------------------------"
- "------------------------------------------------------------------"
- "-------------------\n");
+ " sum-exec sum-wait\n"
+ "---------------------------------------------------------"
+ "--------------------------------------------------------------------"
+ "--------------------------\n");
curr = first_fair(rq);
while (curr) {
Index: linux/kernel/sched_fair.c
===================================================================
--- linux.orig/kernel/sched_fair.c
+++ linux/kernel/sched_fair.c
@@ -27,15 +27,9 @@ static void __enqueue_task_fair(struct r
{
struct rb_node **link = &rq->tasks_timeline.rb_node;
struct rb_node *parent = NULL;
+ long long key = p->fair_key;
struct task_struct *entry;
int leftmost = 1;
- long long key;
-
- key = rq->fair_clock - p->wait_runtime;
- if (unlikely(p->nice_offset))
- key += p->nice_offset / (rq->nr_running + 1);
-
- p->fair_key = key;
/*
* Find the right place in the rbtree:
@@ -48,9 +42,9 @@ static void __enqueue_task_fair(struct r
* the same key stay together.
*/
if (key < entry->fair_key) {
- link = &(*link)->rb_left;
+ link = &parent->rb_left;
} else {
- link = &(*link)->rb_right;
+ link = &parent->rb_right;
leftmost = 0;
}
}
@@ -138,7 +132,7 @@ static inline void update_curr(struct rq
delta_exec = convert_delta(rq, now - curr->exec_start, curr);
delta_fair = delta_exec/rq->nr_running;
- curr->exec_runtime += delta_exec;
+ curr->sum_exec_runtime += delta_exec;
curr->exec_start = now;
rq->fair_clock += delta_fair;
@@ -182,6 +176,11 @@ update_stats_enqueue(struct rq *rq, stru
*/
if (p != rq->curr)
update_stats_wait_start(rq, p, now);
+
+ /*
+ * Update the key:
+ */
+ p->fair_key = rq->fair_clock - p->wait_runtime + p->nice_offset;
}
/*
@@ -195,6 +194,7 @@ static inline void update_stats_wait_end
delta = scale_nice_down(rq, p, delta);
p->wait_runtime += delta;
+ p->sum_wait_runtime += delta;
rq->wait_runtime += delta;
p->wait_start_fair = 0;
@@ -275,6 +275,24 @@ static void requeue_task_fair(struct rq
p->on_rq = 1;
}
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
+{
+ struct task_struct *curr = rq->curr;
+ long long __delta = curr->fair_key - p->fair_key;
+
+ /*
+ * Take scheduling granularity into account - do not
+ * preempt the current task unless the best task has
+ * a larger than sched_granularity fairness advantage:
+ */
+ if (p->prio < curr->prio ||
+ __delta > (unsigned long long)sysctl_sched_granularity)
+ resched_task(curr);
+}
+
static struct task_struct * pick_next_task_fair(struct rq *rq)
{
struct task_struct *p = __pick_next_task_fair(rq);
@@ -362,25 +380,36 @@ static void task_tick_fair(struct rq *rq
* Dequeue and enqueue the task to update its
* position within the tree:
*/
- dequeue_task_fair(rq, curr);
- curr->on_rq = 0;
- enqueue_task_fair(rq, curr);
- curr->on_rq = 1;
+ requeue_task_fair(rq, curr);
/*
* Reschedule if another task tops the current one.
- *
- * Take scheduling granularity into account - do not
- * preempt the current task unless the best task has
- * a larger than sched_granularity fairness advantage:
*/
next = __pick_next_task_fair(rq);
- if (next != curr) {
- unsigned long long __delta = curr->fair_key - next->fair_key;
+ if (next != curr)
+ check_preempt_curr(rq, next);
+}
- if (__delta > (unsigned long long)sysctl_sched_granularity)
- set_tsk_need_resched(curr);
- }
+/*
+ * Share the fairness runtime between parent and child, thus the
+ * total amount of pressure for CPU stays equal - new tasks
+ * get a chance to run but frequent forkers are not allowed to
+ * monopolize the CPU. Note: the parent runqueue is locked,
+ * the child is not running yet.
+ */
+static void task_new_fair(struct rq *rq, struct task_struct *p)
+{
+ sched_info_queued(p);
+ update_stats_enqueue(rq, p);
+ /*
+ * Child runs first: we let it run before the parent
+ * until it reschedules once. We set up a key so that
+ * it will preempt the parent:
+ */
+ p->fair_key = current->fair_key - sysctl_sched_granularity - 1;
+ __enqueue_task_fair(rq, p);
+ p->on_rq = 1;
+ inc_nr_running(p, rq);
}
static inline long
@@ -418,6 +447,8 @@ hog_limit(struct rq *rq, struct task_str
return -(long long)limit;
}
+#define NICE_OFFSET_GRANULARITY 100000
+
/*
* Calculate and cache the nice offset and the hog limit values:
*/
@@ -441,12 +472,15 @@ struct sched_class fair_sched_class __re
.dequeue_task = dequeue_task_fair,
.requeue_task = requeue_task_fair,
+ .check_preempt_curr = check_preempt_curr_fair,
+
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
.load_balance_start = load_balance_start_fair,
.load_balance_next = load_balance_next_fair,
.task_tick = task_tick_fair,
+ .task_new = task_new_fair,
.task_init = task_init_fair,
};
Index: linux/kernel/sched_rt.c
===================================================================
--- linux.orig/kernel/sched_rt.c
+++ linux/kernel/sched_rt.c
@@ -34,6 +34,15 @@ static void requeue_task_rt(struct rq *r
list_move_tail(&p->run_list, array->queue + p->prio);
}
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+{
+ if (p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+}
+
static struct task_struct * pick_next_task_rt(struct rq *rq)
{
struct prio_array *array = &rq->active;
@@ -140,6 +149,15 @@ static void task_tick_rt(struct rq *rq,
}
}
+/*
+ * No parent/child timeslice management necessary for RT tasks,
+ * just activate them:
+ */
+static void task_new_rt(struct rq *rq, struct task_struct *p)
+{
+ activate_task(rq, p);
+}
+
static void task_init_rt(struct rq *rq, struct task_struct *p)
{
}
@@ -149,6 +167,8 @@ static struct sched_class rt_sched_class
.dequeue_task = dequeue_task_rt,
.requeue_task = requeue_task_rt,
+ .check_preempt_curr = check_preempt_curr_rt,
+
.pick_next_task = pick_next_task_rt,
.put_prev_task = put_prev_task_rt,
@@ -156,5 +176,6 @@ static struct sched_class rt_sched_class
.load_balance_next = load_balance_next_rt,
.task_tick = task_tick_rt,
+ .task_new = task_new_rt,
.task_init = task_init_rt,
};
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -222,6 +222,14 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_child_runs_first",
+ .data = &sysctl_sched_child_runs_first,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = KERN_PANIC,
.procname = "panic",
.data = &panic_timeout,
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]