[patch 2/2] sLeAZY FPU feature - i386 support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Chuck Ebbert <[email protected]>

i386 port of the sLeAZY-fpu feature. 
Chuck reports that this gives him a +/- 0.4% improvement on his
simple benchmark

Signed-off-by: Chuck Ebbert <[email protected]>
Signed-off-by: Arjan van de Ven <[email protected]>

 arch/i386/kernel/process.c |   12 ++++++++++++
 arch/i386/kernel/traps.c   |    3 ++-
 include/asm-i386/i387.h    |    5 ++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

Index: linux-2.6.17-sleazyfpu/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.17-sleazyfpu.orig/arch/i386/kernel/process.c
+++ linux-2.6.17-sleazyfpu/arch/i386/kernel/process.c
@@ -631,6 +631,11 @@ struct task_struct fastcall * __switch_t
 
 	__unlazy_fpu(prev_p);
 
+
+	/* we're going to use this soon, after a few expensive things */
+	if (next_p->fpu_counter > 5)
+		prefetch(&next->i387.fxsave);
+
 	/*
 	 * Reload esp0.
 	 */
@@ -689,6 +694,13 @@ struct task_struct fastcall * __switch_t
 
 	disable_tsc(prev_p, next_p);
 
+	/* If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	if (next_p->fpu_counter > 5)
+		math_state_restore();
+
 	return prev_p;
 }
 
Index: linux-2.6.17-sleazyfpu/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.17-sleazyfpu.orig/arch/i386/kernel/traps.c
+++ linux-2.6.17-sleazyfpu/arch/i386/kernel/traps.c
@@ -1063,7 +1063,7 @@ fastcall unsigned char * fixup_x86_bogus
  * Must be called with kernel preemption disabled (in this case,
  * local interrupts are disabled at the call-site in entry.S).
  */
-asmlinkage void math_state_restore(struct pt_regs regs)
+asmlinkage void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
@@ -1073,6 +1073,7 @@ asmlinkage void math_state_restore(struc
 		init_fpu(tsk);
 	restore_fpu(tsk);
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	tsk->fpu_counter++;
 }
 
 #ifndef CONFIG_MATH_EMULATION
Index: linux-2.6.17-sleazyfpu/include/asm-i386/i387.h
===================================================================
--- linux-2.6.17-sleazyfpu.orig/include/asm-i386/i387.h
+++ linux-2.6.17-sleazyfpu/include/asm-i386/i387.h
@@ -76,7 +76,9 @@ static inline void __save_init_fpu( stru
 
 #define __unlazy_fpu( tsk ) do { \
 	if (task_thread_info(tsk)->status & TS_USEDFPU) \
-		save_init_fpu( tsk ); \
+		save_init_fpu( tsk ); 			\
+	else						\
+		tsk->fpu_counter = 0;			\
 } while (0)
 
 #define __clear_fpu( tsk )					\
@@ -118,6 +120,7 @@ static inline void save_init_fpu( struct
 extern unsigned short get_fpu_cwd( struct task_struct *tsk );
 extern unsigned short get_fpu_swd( struct task_struct *tsk );
 extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+extern asmlinkage void math_state_restore(void);
 
 /*
  * Signal frame handlers...


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux