On 5/26/06, Peter Williams <[email protected]> wrote:
This patch implements hard CPU rate caps per task as a proportion of a
single CPU's capacity expressed in parts per thousand.
Signed-off-by: Peter Williams <[email protected]>
include/linux/sched.h | 8 ++
kernel/Kconfig.caps | 14 +++-
kernel/sched.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 168 insertions(+), 8 deletions(-)
Index: MM-2.6.17-rc4-mm3/include/linux/sched.h
===================================================================
--- MM-2.6.17-rc4-mm3.orig/include/linux/sched.h 2006-05-26 10:46:35.000000000 +1000
+++ MM-2.6.17-rc4-mm3/include/linux/sched.h 2006-05-26 11:00:07.000000000 +1000
@@ -796,6 +796,10 @@ struct task_struct {
#ifdef CONFIG_CPU_RATE_CAPS
unsigned long long avg_cpu_per_cycle, avg_cycle_length;
unsigned int cpu_rate_cap;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+ unsigned int cpu_rate_hard_cap;
+ struct timer_list sinbin_timer;
Using a timer for releasing tasks from their sinbin sounds like a bit
of an overhead. Given that there could be 10s of thousands of tasks.
Is it possible to use the scheduler_tick() function take a look at all
deactivated tasks (as efficiently as possible) and activate them when
its time to activate them or just fold the functionality by defining a
time quantum after which everyone is worken up. This time quantum
could be the same as the time over which limits are honoured.
+#endif
#endif
enum sleep_type sleep_type;
@@ -994,6 +998,10 @@ struct task_struct {
#ifdef CONFIG_CPU_RATE_CAPS
unsigned int get_cpu_rate_cap(const struct task_struct *);
int set_cpu_rate_cap(struct task_struct *, unsigned int);
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+unsigned int get_cpu_rate_hard_cap(const struct task_struct *);
+int set_cpu_rate_hard_cap(struct task_struct *, unsigned int);
+#endif
#endif
static inline pid_t process_group(struct task_struct *tsk)
Index: MM-2.6.17-rc4-mm3/kernel/Kconfig.caps
===================================================================
--- MM-2.6.17-rc4-mm3.orig/kernel/Kconfig.caps 2006-05-26 10:45:26.000000000 +1000
+++ MM-2.6.17-rc4-mm3/kernel/Kconfig.caps 2006-05-26 11:00:07.000000000 +1000
@@ -3,11 +3,21 @@
#
config CPU_RATE_CAPS
- bool "Support (soft) CPU rate caps"
+ bool "Support CPU rate caps"
default n
---help---
- Say y here if you wish to be able to put a (soft) upper limit on
+ Say y here if you wish to be able to put a soft upper limit on
the rate of CPU usage by individual tasks. A task which has been
allocated a soft CPU rate cap will be limited to that rate of CPU
usage unless there is spare CPU resources available after the needs
of uncapped tasks are met.
+
+config CPU_RATE_HARD_CAPS
+ bool "Support CPU rate hard caps"
+ depends on CPU_RATE_CAPS
+ default n
+ ---help---
+ Say y here if you wish to be able to put a hard upper limit on
+ the rate of CPU usage by individual tasks. A task which has been
+ allocated a hard CPU rate cap will be limited to that rate of CPU
+ usage regardless of whether there is spare CPU resources available.
Index: MM-2.6.17-rc4-mm3/kernel/sched.c
===================================================================
--- MM-2.6.17-rc4-mm3.orig/kernel/sched.c 2006-05-26 11:00:02.000000000 +1000
+++ MM-2.6.17-rc4-mm3/kernel/sched.c 2006-05-26 13:50:11.000000000 +1000
@@ -201,21 +201,33 @@ static inline unsigned int task_timeslic
#ifdef CONFIG_CPU_RATE_CAPS
#define CAP_STATS_OFFSET 8
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+static void sinbin_release_fn(unsigned long arg);
+#define min_cpu_rate_cap(p) min((p)->cpu_rate_cap, (p)->cpu_rate_hard_cap)
+#else
+#define min_cpu_rate_cap(p) (p)->cpu_rate_cap
+#endif
#define task_has_cap(p) unlikely((p)->flags & PF_HAS_CAP)
/* this assumes that p is not a real time task */
#define task_is_background(p) unlikely((p)->cpu_rate_cap == 0)
#define task_being_capped(p) unlikely((p)->prio >= CAPPED_PRIO)
-#define cap_load_weight(p) (((p)->cpu_rate_cap * SCHED_LOAD_SCALE) / 1000)
+#define cap_load_weight(p) ((min_cpu_rate_cap(p) * SCHED_LOAD_SCALE) / 1000)
static void init_cpu_rate_caps(task_t *p)
{
p->cpu_rate_cap = 1000;
p->flags &= ~PF_HAS_CAP;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+ p->cpu_rate_hard_cap = 1000;
+ init_timer(&p->sinbin_timer);
+ p->sinbin_timer.function = sinbin_release_fn;
+ p->sinbin_timer.data = (unsigned long) p;
+#endif
}
static inline void set_cap_flag(task_t *p)
{
- if (p->cpu_rate_cap < 1000 && !has_rt_policy(p))
+ if (min_cpu_rate_cap(p) < 1000 && !has_rt_policy(p))
p->flags |= PF_HAS_CAP;
else
p->flags &= ~PF_HAS_CAP;
@@ -223,7 +235,7 @@ static inline void set_cap_flag(task_t *
static inline int task_exceeding_cap(const task_t *p)
{
- return (p->avg_cpu_per_cycle * 1000) > (p->avg_cycle_length * p->cpu_rate_cap);
+ return (p->avg_cpu_per_cycle * 1000) > (p->avg_cycle_length * min_cpu_rate_cap(p));
}
#ifdef CONFIG_SCHED_SMT
@@ -257,7 +269,7 @@ static int task_exceeding_cap_now(const
delta = (now > p->timestamp) ? (now - p->timestamp) : 0;
lhs = (p->avg_cpu_per_cycle + delta) * 1000;
- rhs = (p->avg_cycle_length + delta) * p->cpu_rate_cap;
+ rhs = (p->avg_cycle_length + delta) * min_cpu_rate_cap(p);
return lhs > rhs;
}
@@ -266,6 +278,10 @@ static inline void init_cap_stats(task_t
{
p->avg_cpu_per_cycle = 0;
p->avg_cycle_length = 0;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+ init_timer(&p->sinbin_timer);
+ p->sinbin_timer.data = (unsigned long) p;
+#endif
}
static inline void inc_cap_stats_cycle(task_t *p, unsigned long long now)
@@ -1213,6 +1229,64 @@ static void deactivate_task(struct task_
p->array = NULL;
}
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+#define task_has_hard_cap(p) unlikely((p)->cpu_rate_hard_cap < 1000)
+
+/*
+ * Release a task from the sinbin
+ */
+static void sinbin_release_fn(unsigned long arg)
+{
+ unsigned long flags;
+ struct task_struct *p = (struct task_struct*)arg;
+ struct runqueue *rq = task_rq_lock(p, &flags);
+
+ p->prio = effective_prio(p);
+
+ __activate_task(p, rq);
+
+ task_rq_unlock(rq, &flags);
+}
+
+static unsigned long reqd_sinbin_ticks(const task_t *p)
+{
+ unsigned long long res;
+
+ res = p->avg_cpu_per_cycle * 1000;
+
+ if (res > p->avg_cycle_length * p->cpu_rate_hard_cap) {
+ (void)do_div(res, p->cpu_rate_hard_cap);
+ res -= p->avg_cpu_per_cycle;
+ /*
+ * IF it was available we'd also subtract
+ * the average sleep per cycle here
+ */
+ res >>= CAP_STATS_OFFSET;
+ (void)do_div(res, (1000000000 / HZ));
Please use NSEC_PER_SEC if that is what 10^9 stands for in the above
calculation.
+
+ return res ? : 1;
+ }
+
+ return 0;
+}
+
+static void sinbin_task(task_t *p, unsigned long durn)
+{
+ if (durn == 0)
+ return;
+ deactivate_task(p, task_rq(p));
+ p->sinbin_timer.expires = jiffies + durn;
+ add_timer(&p->sinbin_timer);
+}
+#else
+#define task_has_hard_cap(p) 0
+#define reqd_sinbin_ticks(p) 0
+
+static inline void sinbin_task(task_t *p, unsigned long durn)
+{
+}
+#endif
+
/*
* resched_task - mark a task 'to be rescheduled now'.
*
@@ -3508,9 +3582,16 @@ need_resched_nonpreemptible:
}
}
- /* do this now so that stats are correct for SMT code */
- if (task_has_cap(prev))
+ if (task_has_cap(prev)) {
inc_cap_stats_both(prev, now);
+ if (task_has_hard_cap(prev) && !prev->state &&
+ !rt_task(prev) && !signal_pending(prev)) {
+ unsigned long sinbin_ticks = reqd_sinbin_ticks(prev);
+
+ if (sinbin_ticks)
+ sinbin_task(prev, sinbin_ticks);
+ }
+ }
cpu = smp_processor_id();
if (unlikely(!rq->nr_running)) {
@@ -4539,6 +4620,67 @@ out:
}
<snip>
Balbir
Linux Technology Center
IBM Software Labs
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]