From: Chuck Ebbert <[email protected]>
i386 port of the sLeAZY-fpu feature.
Chuck reports that this gives him a +/- 0.4% improvement on his
simple benchmark
Signed-off-by: Chuck Ebbert <[email protected]>
Signed-off-by: Arjan van de Ven <[email protected]>
arch/i386/kernel/process.c | 12 ++++++++++++
arch/i386/kernel/traps.c | 3 ++-
include/asm-i386/i387.h | 5 ++++-
3 files changed, 18 insertions(+), 2 deletions(-)
Index: linux-2.6.17-sleazyfpu/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.17-sleazyfpu.orig/arch/i386/kernel/process.c
+++ linux-2.6.17-sleazyfpu/arch/i386/kernel/process.c
@@ -631,6 +631,11 @@ struct task_struct fastcall * __switch_t
__unlazy_fpu(prev_p);
+
+ /* we're going to use this soon, after a few expensive things */
+ if (next_p->fpu_counter > 5)
+ prefetch(&next->i387.fxsave);
+
/*
* Reload esp0.
*/
@@ -689,6 +694,13 @@ struct task_struct fastcall * __switch_t
disable_tsc(prev_p, next_p);
+ /* If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ if (next_p->fpu_counter > 5)
+ math_state_restore();
+
return prev_p;
}
Index: linux-2.6.17-sleazyfpu/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.17-sleazyfpu.orig/arch/i386/kernel/traps.c
+++ linux-2.6.17-sleazyfpu/arch/i386/kernel/traps.c
@@ -1063,7 +1063,7 @@ fastcall unsigned char * fixup_x86_bogus
* Must be called with kernel preemption disabled (in this case,
* local interrupts are disabled at the call-site in entry.S).
*/
-asmlinkage void math_state_restore(struct pt_regs regs)
+asmlinkage void math_state_restore(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
@@ -1073,6 +1073,7 @@ asmlinkage void math_state_restore(struc
init_fpu(tsk);
restore_fpu(tsk);
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+ tsk->fpu_counter++;
}
#ifndef CONFIG_MATH_EMULATION
Index: linux-2.6.17-sleazyfpu/include/asm-i386/i387.h
===================================================================
--- linux-2.6.17-sleazyfpu.orig/include/asm-i386/i387.h
+++ linux-2.6.17-sleazyfpu/include/asm-i386/i387.h
@@ -76,7 +76,9 @@ static inline void __save_init_fpu( stru
#define __unlazy_fpu( tsk ) do { \
if (task_thread_info(tsk)->status & TS_USEDFPU) \
- save_init_fpu( tsk ); \
+ save_init_fpu( tsk ); \
+ else \
+ tsk->fpu_counter = 0; \
} while (0)
#define __clear_fpu( tsk ) \
@@ -118,6 +120,7 @@ static inline void save_init_fpu( struct
extern unsigned short get_fpu_cwd( struct task_struct *tsk );
extern unsigned short get_fpu_swd( struct task_struct *tsk );
extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+extern asmlinkage void math_state_restore(void);
/*
* Signal frame handlers...
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]