[RFC PATCH 11/35] Add support for Xen to entry.S.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



- change cli/sti
- change test for user mode return to work for kernel mode in ring1
- check hypervisor saved event mask on return from exception
- add entry points for the hypervisor upcall handlers
- avoid math emulation check when running on Xen
- add nmi handler for running on Xen

Signed-off-by: Ian Pratt <[email protected]>
Signed-off-by: Christian Limpach <[email protected]>
Signed-off-by: Chris Wright <[email protected]>
---
 arch/i386/kernel/entry.S |  228 +++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 211 insertions(+), 17 deletions(-)

--- xen-subarch-2.6.orig/arch/i386/kernel/entry.S
+++ xen-subarch-2.6/arch/i386/kernel/entry.S
@@ -75,8 +75,42 @@ DF_MASK		= 0x00000400 
 NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
+#ifndef CONFIG_XEN
+#define DISABLE_INTERRUPTS	cli
+#define ENABLE_INTERRUPTS	sti
+#else
+#include <xen/interface/xen.h>
+
+EVENT_MASK	= 0x2E
+
+/* Pseudo-eflags. */
+NMI_MASK	= 0x80000000
+
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending		/* 0 */
+#define evtchn_upcall_mask		1
+
+#define sizeof_vcpu_shift		6
+
+#ifdef CONFIG_SMP
+#define GET_VCPU_INFO		movl TI_cpu(%ebp),%esi			; \
+				shl  $sizeof_vcpu_shift,%esi		; \
+				addl HYPERVISOR_shared_info,%esi
+#else
+#define GET_VCPU_INFO		movl HYPERVISOR_shared_info,%esi
+#endif
+
+#define __DISABLE_INTERRUPTS	movb $1,evtchn_upcall_mask(%esi)
+#define __ENABLE_INTERRUPTS	movb $0,evtchn_upcall_mask(%esi)
+#define DISABLE_INTERRUPTS	GET_VCPU_INFO				; \
+				__DISABLE_INTERRUPTS
+#define ENABLE_INTERRUPTS	GET_VCPU_INFO				; \
+				__ENABLE_INTERRUPTS
+#define __TEST_PENDING		testb $0xFF,evtchn_upcall_pending(%esi)
+#endif
+
 #ifdef CONFIG_PREEMPT
-#define preempt_stop		cli
+#define preempt_stop		DISABLE_INTERRUPTS
 #else
 #define preempt_stop
 #define resume_kernel		restore_nocheck
@@ -145,10 +179,10 @@ ret_from_intr:
 	GET_THREAD_INFO(%ebp)
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb CS(%esp), %al
-	testl $(VM_MASK | 3), %eax
+	testl $(VM_MASK | USER_MODE_MASK), %eax
 	jz resume_kernel
 ENTRY(resume_userspace)
- 	cli				# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	movl TI_flags(%ebp), %ecx
@@ -159,7 +193,7 @@ ENTRY(resume_userspace)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-	cli
+	DISABLE_INTERRUPTS
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_nocheck
 need_resched:
@@ -179,7 +213,7 @@ need_resched:
 ENTRY(sysenter_entry)
 	movl TSS_sysenter_esp0(%esp),%esp
 sysenter_past_esp:
-	sti
+	ENABLE_INTERRUPTS
 	pushl $(__USER_DS)
 	pushl %ebp
 	pushfl
@@ -209,7 +243,7 @@ sysenter_past_esp:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
-	cli
+	DISABLE_INTERRUPTS
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
@@ -217,7 +251,7 @@ sysenter_past_esp:
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
-	sti
+	ENABLE_INTERRUPTS
 	sysexit
 
 
@@ -236,7 +270,7 @@ syscall_call:
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)		# store the return value
 syscall_exit:
-	cli				# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	movl TI_flags(%ebp), %ecx
@@ -244,6 +278,7 @@ syscall_exit:
 	jne syscall_exit_work
 
 restore_all:
+#ifndef CONFIG_XEN
 	movl EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 	# Warning: OLDSS(%esp) contains the wrong/random values if we
 	# are returning to the kernel.
@@ -254,12 +289,25 @@ restore_all:
 	cmpl $((4 << 8) | 3), %eax
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
+#else
+restore_nocheck:
+	testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
+	jnz hypervisor_iret
+	movb EVENT_MASK(%esp), %al
+	notb %al			# %al == ~saved_mask
+	GET_VCPU_INFO
+	andb evtchn_upcall_mask(%esi),%al
+	andb $1,%al			# %al == mask & ~saved_mask
+	jnz restore_all_enable_events	#     != 0 => reenable event delivery
+#endif
 	RESTORE_REGS
 	addl $4, %esp
 1:	iret
 .section .fixup,"ax"
 iret_exc:
-	sti
+#ifndef CONFIG_XEN
+	ENABLE_INTERRUPTS
+#endif
 	pushl $0			# no error code
 	pushl $do_iret_error
 	jmp error_code
@@ -269,6 +317,7 @@ iret_exc:
 	.long 1b,iret_exc
 .previous
 
+#ifndef CONFIG_XEN
 ldt_ss:
 	larl OLDSS(%esp), %eax
 	jnz restore_nocheck
@@ -281,7 +330,7 @@ ldt_ss:
 	 * CPUs, which we can try to work around to make
 	 * dosemu and wine happy. */
 	subl $8, %esp		# reserve space for switch16 pointer
-	cli
+	DISABLE_INTERRUPTS
 	movl %esp, %eax
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	 * and a switch16 pointer on top of the current frame. */
@@ -293,6 +342,13 @@ ldt_ss:
 	.align 4
 	.long 1b,iret_exc
 .previous
+#else
+hypervisor_iret:
+	andl $~NMI_MASK, EFLAGS(%esp)
+	RESTORE_REGS
+	addl $4, %esp
+	jmp  hypercall_page + (__HYPERVISOR_iret * 32)
+#endif
 
 	# perform work that needs to be done immediately before resumption
 	ALIGN
@@ -301,7 +357,7 @@ work_pending:
 	jz work_notifysig
 work_resched:
 	call schedule
-	cli				# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	movl TI_flags(%ebp), %ecx
@@ -353,7 +409,7 @@ syscall_trace_entry:
 syscall_exit_work:
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	jz work_pending
-	sti				# could let do_syscall_trace() call
+	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
 					# schedule() instead
 	movl %esp, %eax
 	movl $1, %edx
@@ -373,6 +429,7 @@ syscall_badsys:
 	movl $-ENOSYS,EAX(%esp)
 	jmp resume_userspace
 
+#ifndef CONFIG_XEN
 #define FIXUP_ESPFIX_STACK \
 	movl %esp, %eax; \
 	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
@@ -431,6 +488,9 @@ ENTRY(name)				\
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
+#else
+#define UNWIND_ESPFIX_STACK
+#endif
 
 ENTRY(divide_error)
 	pushl $0			# no error code
@@ -462,6 +522,126 @@ error_code:
 	call *%edi
 	jmp ret_from_exception
 
+#ifdef CONFIG_XEN
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+	pushl %eax
+	SAVE_ALL
+	movl EIP(%esp),%eax
+	cmpl $scrit,%eax
+	jb   11f
+	cmpl $ecrit,%eax
+	jb   critical_region_fixup
+11:	push %esp
+	call evtchn_do_upcall
+	add  $4,%esp
+	jmp  ret_from_intr
+
+        ALIGN
+restore_all_enable_events:
+	__ENABLE_INTERRUPTS
+scrit:	/**** START OF CRITICAL REGION ****/
+	__TEST_PENDING
+	jnz  14f			# process more events if necessary...
+	RESTORE_REGS
+	addl $4, %esp
+1:	iret
+.section .fixup,"ax"
+2:	pushl $0
+	pushl $do_iret_error
+	jmp error_code
+.previous
+.section __ex_table,"a"
+	.align 4
+	.long 1b,2b
+.previous
+14:	__DISABLE_INTERRUPTS
+	jmp  11b
+ecrit:  /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+critical_region_fixup:
+	addl $critical_fixup_table-scrit,%eax
+	movzbl (%eax),%eax		# %eax contains num bytes popped
+	cmpb $0xff,%al                  # 0xff => vcpu_info critical region
+	jne  15f
+	GET_THREAD_INFO(%ebp)
+        xorl %eax,%eax
+15:	mov  %esp,%esi
+	add  %eax,%esi			# %esi points at end of src region
+	mov  %esp,%edi
+	add  $0x34,%edi			# %edi points at end of dst region
+	mov  %eax,%ecx
+	shr  $2,%ecx			# convert words to bytes
+	je   17f			# skip loop if nothing to copy
+16:	subl $4,%esi			# pre-decrementing copy loop
+	subl $4,%edi
+	movl (%esi),%eax
+	movl %eax,(%edi)
+	loop 16b
+17:	movl %edi,%esp			# final %edi is top of merged stack
+	jmp  11b
+
+critical_fixup_table:
+	.byte 0xff,0xff,0xff		# testb $0xff,(%esi) = __TEST_PENDING
+	.byte 0xff,0xff			# jnz  14f
+	.byte 0x00			# pop  %ebx
+	.byte 0x04			# pop  %ecx
+	.byte 0x08			# pop  %edx
+	.byte 0x0c			# pop  %esi
+	.byte 0x10			# pop  %edi
+	.byte 0x14			# pop  %ebp
+	.byte 0x18			# pop  %eax
+	.byte 0x1c			# pop  %ds
+	.byte 0x20			# pop  %es
+	.byte 0x24,0x24,0x24		# add  $4,%esp
+	.byte 0x28			# iret
+	.byte 0xff,0xff,0xff,0xff	# movb $1,1(%esi)
+	.byte 0x00,0x00			# jmp  11b
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+1:	popl %ds
+2:	popl %es
+3:	popl %fs
+4:	popl %gs
+	subl $4,%esp
+	SAVE_ALL
+	jmp  ret_from_exception
+.section .fixup,"ax";	\
+6:	movl $0,(%esp);	\
+	jmp 1b;		\
+7:	movl $0,(%esp);	\
+	jmp 2b;		\
+8:	movl $0,(%esp);	\
+	jmp 3b;		\
+9:	movl $0,(%esp);	\
+	jmp 4b;		\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,6b;	\
+	.long 2b,7b;	\
+	.long 3b,8b;	\
+	.long 4b,9b;	\
+.previous
+#endif
+
 ENTRY(coprocessor_error)
 	pushl $0
 	pushl $do_coprocessor_error
@@ -475,17 +655,19 @@ ENTRY(simd_coprocessor_error)
 ENTRY(device_not_available)
 	pushl $-1			# mark this as an int
 	SAVE_ALL
+#ifndef CONFIG_XEN
 	movl %cr0, %eax
 	testl $0x4, %eax		# EM (math emulation bit)
-	jne device_not_available_emulate
-	preempt_stop
-	call math_state_restore
-	jmp ret_from_exception
-device_not_available_emulate:
+	je device_available_emulate
 	pushl $0			# temporary storage for ORIG_EIP
 	call math_emulate
 	addl $4, %esp
 	jmp ret_from_exception
+device_available_emulate:
+#endif
+	preempt_stop
+	call math_state_restore
+	jmp ret_from_exception
 
 /*
  * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -521,6 +703,8 @@ debug_stack_correct:
 	call do_debug
 	jmp ret_from_exception
 	.previous .text
+
+#ifndef CONFIG_XEN
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * a debug fault, and the debug fault hasn't yet been able to
@@ -591,6 +775,16 @@ nmi_16bit_stack:
 	.align 4
 	.long 1b,iret_exc
 .previous
+#else
+ENTRY(nmi)
+	pushl %eax
+	SAVE_ALL
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_nmi
+	orl  $NMI_MASK, EFLAGS(%esp)
+	jmp restore_all
+#endif
 
 KPROBE_ENTRY(int3)
 	pushl $-1			# mark this as an int

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux