[RFC][PATCH] Runtime switching to idle_poll (was: Re: 2.6.14-rt13)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2005-11-24 at 16:07 +0100, Ingo Molnar wrote:
> * Steven Rostedt <[email protected]> wrote:
> > Ingo, This could be a temporary patch until we come up with a better 
> > solution.  This adds /sys/kernel/idle/idle_poll, which if idle=poll is 
> > _not_ set, it still lets you switch the machine to idle=poll on the 
> > fly, as well as turn it off. If you have idle=poll, this doesn't even 
> > show up.
> 
> ok, i've applied this one too. Could you also submit it upstream (and 
> implement it for x86)? It makes sense to enable/disable the 
> polling-based idle routine runtime.

As a request from Ingo, I fixed up this patch a little to allow both
x86_64 and i386 to switch to and from idle_poll at runtime.  I noticed
that the APCI driver in drivers/acpi/processor_idle.c may cause some
race condition with this patch so I added some protection there.
Basically, if the acpi code changes pm_idle, then you can't change to
idle_poll, and vice-versa.

What this patch does is creates an entry
into /sys/kernel/idle/idle_poll.  It will show whether or not the
idle_poll is being used as a runtime idle routine.  It is also used to
set the runtime idle.

with:

# echo 1 > /sys/kernel/idle/idle_poll
  or
# echo on > /sys/kernel/idle/idle_poll

The system will switch to the idle_poll idle routine.

with:

# echo 0 > /sys/kernel/idle/idle_poll
  or
# echo off > /sys/kernel/idle/idle_poll

The system will switch out of idle poll.  Note that if the command line
states "idle=poll" then this will not be implemented.

This is still a work-in-progress.  Since I only own a x86_64 and i386
that is all I ported the code for and tested.  Looking for who else
exports pm_idle I see that the following archs may also need to be
updated:

arm, arm26, i64, sparc.

I also have not yet protected the pm_idle in arch/i386/kernel/apm.c

I figure that I should get some comments before I spend any more time on
this.

Thanks,

-- Steve

Index: linux-2.6.15-rc2-git5/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.15-rc2-git5.orig/arch/i386/kernel/Makefile	2005-10-27 20:02:08.000000000 -0400
+++ linux-2.6.15-rc2-git5/arch/i386/kernel/Makefile	2005-11-25 11:56:25.000000000 -0500
@@ -34,6 +34,7 @@
 obj-$(CONFIG_HPET_TIMER) 	+= time_hpet.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_SYSFS)		+= switch2poll.o
 
 EXTRA_AFLAGS   := -traditional
 
Index: linux-2.6.15-rc2-git5/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.15-rc2-git5.orig/arch/i386/kernel/process.c	2005-11-25 10:58:53.000000000 -0500
+++ linux-2.6.15-rc2-git5/arch/i386/kernel/process.c	2005-11-25 12:18:12.000000000 -0500
@@ -39,6 +39,7 @@
 #include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -64,6 +65,12 @@
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
+spinlock_t pm_idle_switch_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(pm_idle_switch_lock);
+
+int pm_idle_locked = 0;
+EXPORT_SYMBOL(pm_idle_locked);
+
 /*
  * Return saved PC of a blocked thread.
  */
@@ -126,7 +133,7 @@
  * to poll the ->work.need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+void poll_idle (void)
 {
 	local_irq_enable();
 
Index: linux-2.6.15-rc2-git5/arch/i386/kernel/switch2poll.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.15-rc2-git5/arch/i386/kernel/switch2poll.c	2005-11-25 11:55:19.000000000 -0500
@@ -0,0 +1,5 @@
+/*
+ * Same type of hack used for early_printk.  This keeps the code
+ * in one place.
+ */
+#include "../../x86_64/kernel/switch2poll.c"
Index: linux-2.6.15-rc2-git5/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.15-rc2-git5.orig/arch/x86_64/kernel/Makefile	2005-11-22 12:13:24.000000000 -0500
+++ linux-2.6.15-rc2-git5/arch/x86_64/kernel/Makefile	2005-11-25 11:56:40.000000000 -0500
@@ -30,6 +30,7 @@
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
+obj-$(CONFIG_SYSFS)		+= switch2poll.o
 
 obj-$(CONFIG_MODULES)		+= module.o
 
Index: linux-2.6.15-rc2-git5/arch/x86_64/kernel/process.c
===================================================================
--- linux-2.6.15-rc2-git5.orig/arch/x86_64/kernel/process.c	2005-11-25 10:58:53.000000000 -0500
+++ linux-2.6.15-rc2-git5/arch/x86_64/kernel/process.c	2005-11-25 12:17:53.000000000 -0500
@@ -36,6 +36,7 @@
 #include <linux/utsname.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -60,6 +61,12 @@
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
+spinlock_t pm_idle_switch_lock = SPIN_LOCK_UNLOCKED;
+EXPORT_SYMBOL(pm_idle_switch_lock);
+
+int pm_idle_locked = 0;
+EXPORT_SYMBOL(pm_idle_locked);
+
 /*
  * Powermanagement idle function, if any..
  */
@@ -110,7 +117,7 @@
  * to poll the ->need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+void poll_idle (void)
 {
 	local_irq_enable();
 
Index: linux-2.6.15-rc2-git5/arch/x86_64/kernel/switch2poll.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.15-rc2-git5/arch/x86_64/kernel/switch2poll.c	2005-11-25 12:23:22.000000000 -0500
@@ -0,0 +1,112 @@
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/spinlock.h>
+#include <linux/pm.h>
+
+extern void poll_idle (void);
+
+#define KERNEL_ATTR_RW(_name) \
+static struct subsys_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static struct idlep_kobject
+{
+	struct kobject kobj;
+	int is_poll;
+	void (*idle)(void);
+} idle_kobj;
+
+static ssize_t idle_poll_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", (idle_kobj.is_poll ? "on" : "off"));
+}
+
+static ssize_t idle_poll_store(struct subsystem *subsys,
+			       const char *buf, size_t len)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pm_idle_switch_lock, flags);
+
+	/*
+	 * If power management is handling the idle function,
+	 * then leave it be.
+	 */
+	if (pm_idle_locked) {
+		len = -EBUSY;
+		goto out;
+	}
+
+	if (strncmp(buf,"1",1)==0 ||
+	    (len >=2 && strncmp(buf,"on",2)==0)) {
+		if (idle_kobj.is_poll != 1) {
+			idle_kobj.is_poll = 1;
+			boot_option_idle_override = 1;
+			idle_kobj.idle = pm_idle;
+			pm_idle = poll_idle;
+		}
+	} else if (strncmp(buf,"0",1)==0 ||
+		   (len >= 3 && strncmp(buf,"off",3)==0)) {
+		if (idle_kobj.is_poll != 0) {
+			boot_option_idle_override = 0;
+			idle_kobj.is_poll = 0;
+			pm_idle = idle_kobj.idle;
+		}
+	}
+
+out:
+	spin_unlock_irqrestore(&pm_idle_switch_lock, flags);
+
+	return len;
+}
+
+
+KERNEL_ATTR_RW(idle_poll);
+
+static struct attribute * idle_attrs[] = {
+	&idle_poll_attr.attr,
+	NULL
+};
+
+static struct attribute_group idle_attr_group = {
+	.attrs = idle_attrs,
+};
+
+static int __init idle_poll_set_init(void)
+{
+	int err;
+
+	/*
+	 * If the default is alread poll_idle then
+	 * don't even bother with this.
+	 */
+	if (pm_idle == poll_idle)
+		return 0;
+
+	memset(&idle_kobj, 0, sizeof(idle_kobj));
+
+	idle_kobj.is_poll = 0;
+	idle_kobj.idle = pm_idle;
+
+	err = kobject_set_name(&idle_kobj.kobj, "%s", "idle");
+	if (err)
+		goto out;
+
+	idle_kobj.kobj.parent = &kernel_subsys.kset.kobj;
+	err = kobject_register(&idle_kobj.kobj);
+	if (err)
+		goto out;
+
+	err = sysfs_create_group(&idle_kobj.kobj,
+				 &idle_attr_group);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	printk(KERN_INFO "Problem setting up sysfs idle_poll\n");
+	return 0;
+}
+
+late_initcall(idle_poll_set_init);
Index: linux-2.6.15-rc2-git5/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.15-rc2-git5.orig/drivers/acpi/processor_idle.c	2005-11-22 12:13:24.000000000 -0500
+++ linux-2.6.15-rc2-git5/drivers/acpi/processor_idle.c	2005-11-25 13:15:59.000000000 -0500
@@ -38,6 +38,7 @@
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>	/* need_resched() */
+#include <linux/spinlock.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -990,6 +991,7 @@
 	static int first_run = 0;
 	struct proc_dir_entry *entry = NULL;
 	unsigned int i;
+	unsigned long flags;
 
 	ACPI_FUNCTION_TRACE("acpi_processor_power_init");
 
@@ -1023,6 +1025,7 @@
 	 * Note that we use previously set idle handler will be used on
 	 * platforms that only support C1.
 	 */
+	spin_lock_irqsave(&pm_idle_switch_lock, flags);
 	if ((pr->flags.power) && (!boot_option_idle_override)) {
 		printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
 		for (i = 1; i <= pr->power.count; i++)
@@ -1034,8 +1037,13 @@
 		if (pr->id == 0) {
 			pm_idle_save = pm_idle;
 			pm_idle = acpi_processor_idle;
+			/*
+			 * Don't allow switching of the pm_idle to poll.
+			 */
+			pm_idle_locked = 1;
 		}
 	}
+	spin_unlock_irqrestore(&pm_idle_switch_lock, flags);
 
 	/* 'power' [R] */
 	entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER,
@@ -1078,5 +1086,7 @@
 		cpu_idle_wait();
 	}
 
+	pm_idle_locked = 0;
+
 	return_VALUE(0);
 }
Index: linux-2.6.15-rc2-git5/include/linux/pm.h
===================================================================
--- linux-2.6.15-rc2-git5.orig/include/linux/pm.h	2005-11-25 12:05:33.000000000 -0500
+++ linux-2.6.15-rc2-git5/include/linux/pm.h	2005-11-25 12:17:17.000000000 -0500
@@ -25,6 +25,7 @@
 
 #include <linux/config.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 #include <asm/atomic.h>
 
 /*
@@ -102,6 +103,8 @@
  */
 extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
+extern spinlock_t pm_idle_switch_lock;
+extern int pm_idle_locked;
 
 typedef int __bitwise suspend_state_t;
 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux