Re: [REPORT] cfs-v4 vs sd-0.44

* Con Kolivas <[email protected]> wrote:

> >   Feels even better, mouse movements are very smooth even under high 
> >   load. I noticed that X gets reniced to -19 with this scheduler. 
> >   I've not looked at the code yet but this looked suspicious to me. 
> >   I've reniced it to 0 and it did not change any behaviour. Still 
> >   very good.
> 
> Looks like this code does it:
> 
> +int sysctl_sched_privileged_nice_level __read_mostly = -19;

correct. Note that Willy reniced X back to 0 so it had no relevance on 
his test. Also note that i pointed this change out in the -v4 CFS 
announcement:

|| Changes since -v3:
||
||  - usability fix: automatic renicing of kernel threads such as 
||    keventd, OOM tasks and tasks doing privileged hardware access
||    (such as Xorg).

i've attached it below in a standalone form, feel free to put it into 
SD! :)

	Ingo

---
 arch/i386/kernel/ioport.c   |   13 ++++++++++---
 arch/x86_64/kernel/ioport.c |    8 ++++++--
 drivers/block/loop.c        |    5 ++++-
 include/linux/sched.h       |    7 +++++++
 kernel/sched.c              |   40 ++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c          |    2 +-
 mm/oom_kill.c               |    4 +++-
 7 files changed, 71 insertions(+), 8 deletions(-)

Index: linux/arch/i386/kernel/ioport.c
===================================================================
--- linux.orig/arch/i386/kernel/ioport.c
+++ linux/arch/i386/kernel/ioport.c
@@ -64,9 +64,15 @@ asmlinkage long sys_ioperm(unsigned long
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
+	if (turn_on) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		/*
+		 * Task will be accessing hardware IO ports,
+		 * mark it as special with the scheduler too:
+		 */
+		sched_privileged_task(current);
+	}
 	/*
 	 * If it's the first ioperm() call in this thread's lifetime, set the
 	 * IO bitmap up. ioperm() is much less timing critical than clone(),
@@ -145,6 +151,7 @@ asmlinkage long sys_iopl(unsigned long u
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+		sched_privileged_task(current);
 	}
 	t->iopl = level << 12;
 	regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
Index: linux/arch/x86_64/kernel/ioport.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ioport.c
+++ linux/arch/x86_64/kernel/ioport.c
@@ -41,8 +41,11 @@ asmlinkage long sys_ioperm(unsigned long
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
+	if (turn_on) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		sched_privileged_task(current);
+	}
 
 	/*
 	 * If it's the first ioperm() call in this thread's lifetime, set the
@@ -113,6 +116,7 @@ asmlinkage long sys_iopl(unsigned int le
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+		sched_privileged_task(current);
 	}
 	regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
 	return 0;
Index: linux/drivers/block/loop.c
===================================================================
--- linux.orig/drivers/block/loop.c
+++ linux/drivers/block/loop.c
@@ -588,7 +588,10 @@ static int loop_thread(void *data)
 	 */
 	current->flags |= PF_NOFREEZE;
 
-	set_user_nice(current, -20);
+	/*
+	 * The loop thread is important enough to be given a boost:
+	 */
+	sched_privileged_task(current);
 
 	while (!kthread_should_stop() || lo->lo_bio) {
 
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -1256,6 +1256,13 @@ static inline int rt_mutex_getprio(struc
 #endif
 
 extern void set_user_nice(struct task_struct *p, long nice);
+/*
+ * Task has special privileges, give it more CPU power:
+ */
+extern void sched_privileged_task(struct task_struct *p);
+
+extern int sysctl_sched_privileged_nice_level;
+
 extern int task_prio(const struct task_struct *p);
 extern int task_nice(const struct task_struct *p);
 extern int can_nice(const struct task_struct *p, const int nice);
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -3251,6 +3251,46 @@ out_unlock:
 EXPORT_SYMBOL(set_user_nice);
 
 /*
+ * Nice level for privileged tasks. (can be set to 0 for this
+ * to be turned off)
+ */
+int sysctl_sched_privileged_nice_level __read_mostly = -19;
+
+static int __init privileged_nice_level_setup(char *str)
+{
+	sysctl_sched_privileged_nice_level = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("privileged_nice_level=", privileged_nice_level_setup);
+
+/*
+ * Tasks with special privileges call this and gain extra nice
+ * levels:
+ */
+void sched_privileged_task(struct task_struct *p)
+{
+	long new_nice = sysctl_sched_privileged_nice_level;
+	long old_nice = TASK_NICE(p);
+
+	if (new_nice >= old_nice)
+		return;
+	/*
+	 * Setting the sysctl to 0 turns off the boosting:
+	 */
+	if (unlikely(!new_nice))
+		return;
+
+	if (new_nice < -20)
+		new_nice = -20;
+	else if (new_nice > 19)
+		new_nice = 19;
+
+	set_user_nice(p, new_nice);
+}
+
+EXPORT_SYMBOL(sched_privileged_task);
+
+/*
  * can_nice - check if a task can reduce its nice value
  * @p: task
  * @nice: nice value
Index: linux/kernel/workqueue.c
===================================================================
--- linux.orig/kernel/workqueue.c
+++ linux/kernel/workqueue.c
@@ -355,7 +355,7 @@ static int worker_thread(void *__cwq)
 	if (!cwq->freezeable)
 		current->flags |= PF_NOFREEZE;
 
-	set_user_nice(current, -5);
+	sched_privileged_task(current);
 
 	/* Block and flush all signals */
 	sigfillset(&blocked);
Index: linux/mm/oom_kill.c
===================================================================
--- linux.orig/mm/oom_kill.c
+++ linux/mm/oom_kill.c
@@ -291,7 +291,9 @@ static void __oom_kill_task(struct task_
 	 * all the memory it needs. That way it should be able to
 	 * exit() and clear out its resources quickly...
 	 */
-	p->time_slice = HZ;
+	if (p->policy == SCHED_NORMAL || p->policy == SCHED_BATCH)
+		sched_privileged_task(p);
+
 	set_tsk_thread_flag(p, TIF_MEMDIE);
 
 	force_sig(SIGKILL, p);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Follow-Ups:
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: Con Kolivas <[email protected]>
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: Denis Vlasenko <[email protected]>
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: Jan Engelhardt <[email protected]>
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: William Lee Irwin III <[email protected]>
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: Willy Tarreau <[email protected]>

References:
- [patch] CFS scheduler, v4
  - From: Ingo Molnar <[email protected]>
- [REPORT] cfs-v4 vs sd-0.44
  - From: Willy Tarreau <[email protected]>
- Re: [REPORT] cfs-v4 vs sd-0.44
  - From: Con Kolivas <[email protected]>

Prev by Date: Re: [patch 7/8] allow unprivileged mounts
Next by Date: Re: [Devel] Re: [PATCH] bluetooth bnep: Convert to kthread API.
Previous by thread: Re: [REPORT] cfs-v4 vs sd-0.44
Next by thread: Re: [REPORT] cfs-v4 vs sd-0.44
Index(es):
- Date
- Thread

[Index of Archives] [Kernel Newbies] [Netfilter] [Bugtraq] [Photo] [Stuff] [Gimp] [Yosemite News] [MIPS Linux] [ARM Linux] [Linux Security] [Linux RAID] [Video 4 Linux] [Linux for the blind] [Linux Resources]