[PATCH] Add I/O hypercalls for i386 paravirt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In general, I/O in a virtual guest is subject to performance problems. The I/O can not be completed physically, but must be virtualized. This means trapping and decoding port I/O instructions from the guest OS. Not only is the trap for a #GP heavyweight, both in the processor and the hypervisor (which usually has a complex #GP path), but this forces the hypervisor to decode the individual instruction which has faulted. Worse, even with hardware assist such as VT, the exit reason alone is not sufficient to determine the true nature of the faulting instruction, requiring a complex and costly instruction decode and simulation.

This patch provides hypercalls for the i386 port I/O instructions, which vastly helps guests which use native-style drivers. For certain VMI workloads, this provides a performance boost of up to 30%. We expect KVM and lguest to be able to achieve similar gains on I/O intensive workloads.

This patch is against 2.6.23-rc2-mm2, and should be targeted for 2.6.24.

Zach
Virtualized guests in general benefit from having I/O hypercalls.  This
patch adds support for port I/O hypercalls to VMI and provides the
infrastructure for other backends to make use of this feature.

Signed-off-by: Zachary Amsden <[email protected]>

diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index ea962c0..4d0d150 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -329,6 +329,18 @@ struct paravirt_ops paravirt_ops = {
 
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
+	.outb = native_outb,
+	.outw = native_outw,
+	.outl = native_outl,
+	.inb = native_inb,
+	.inw = native_inw,
+	.inl = native_inl,
+	.outsb = native_outsb,
+	.outsw = native_outsw,
+	.outsl = native_outsl,
+	.insb = native_insb,
+	.insw = native_insw,
+	.insl = native_insl,
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	.apic_write = native_apic_write,
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 44feb34..5ecd85b 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -56,6 +56,7 @@ static int disable_tsc;
 static int disable_mtrr;
 static int disable_noidle;
 static int disable_vmi_timer;
+static int disable_io_ops;
 
 /* Cached VMI operations */
 static struct {
@@ -72,6 +73,18 @@ static struct {
 	void (*set_initial_ap_state)(int, int);
 	void (*halt)(void);
   	void (*set_lazy_mode)(int mode);
+	void (*outb)(u8 value, u16 port);
+	void (*outw)(u16 value, u16 port);
+	void (*outl)(u32 value, u16 port);
+	u8 (*inb)(u16 port);
+	u16 (*inw)(u16 port);
+	u32 (*inl)(u16 port);
+	void (*outsb)(const void *addr, u16 port, u32 count);
+	void (*outsw)(const void *addr, u16 port, u32 count);
+	void (*outsl)(const void *addr, u16 port, u32 count);
+	void (*insb)(void *addr, u16 port, u32 count);
+	void (*insw)(void *addr, u16 port, u32 count);
+	void (*insl)(void *addr, u16 port, u32 count);
 } vmi_ops;
 
 /* Cached VMI operations */
@@ -565,6 +578,33 @@ static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
 	}
 }
 
+#define BUILDIO(bwl,type) \
+static void vmi_out##bwl(type value, int port) { \
+	__asm__ __volatile__("call *%0" : : \
+		"r"(vmi_ops.out##bwl), "a"(value), "d"(port)); \
+} \
+static type vmi_in##bwl(int port) { \
+	type value; \
+	__asm__ __volatile__("call *%1" : \
+		"=a"(value) : \
+		"r"(vmi_ops.in##bwl), "d"(port)); \
+	return value; \
+} \
+static void vmi_outs##bwl(int port, const void *addr, unsigned long count) { \
+	__asm__ __volatile__("call *%2" : \
+		"+S"(addr), "+c"(count) : \
+		"r"(vmi_ops.outs##bwl), "d"(port)); \
+} \
+static void vmi_ins##bwl(int port, void *addr, unsigned long count) { \
+	__asm__ __volatile__("call *%2" : \
+		"+D"(addr), "+c"(count) : \
+		"r"(vmi_ops.ins##bwl), "d"(port)); \
+} 
+
+BUILDIO(b,unsigned char)
+BUILDIO(w,unsigned short)
+BUILDIO(l,unsigned int)
+
 static inline int __init check_vmi_rom(struct vrom_header *rom)
 {
 	struct pci_header *pci;
@@ -791,6 +831,21 @@ static inline int __init activate_vmi(void)
 	para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(set_iopl_mask, SetIOPLMask);
 	para_fill(io_delay, IODelay);
+	if (!disable_io_ops) {
+		para_wrap(inb, vmi_inb, inb, INB);
+		para_wrap(inw, vmi_inw, inw, INW);
+		para_wrap(inl, vmi_inl, inl, INL);
+		para_wrap(outb, vmi_outb, outb, OUTB);
+		para_wrap(outw, vmi_outw, outw, OUTW);
+		para_wrap(outl, vmi_outl, outl, OUTL);
+		para_wrap(insb, vmi_insb, insb, INSB);
+		para_wrap(insw, vmi_insw, insw, INSW);
+		para_wrap(insl, vmi_insl, insl, INSL);
+		para_wrap(outsb, vmi_outsb, outsb, OUTSB);
+		para_wrap(outsw, vmi_outsw, outsw, OUTSW);
+		para_wrap(outsl, vmi_outsl, outsl, OUTSL);
+	}
+
 	para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
 
 	/* user and kernel flush are just handled with different flags to FlushTLB */
@@ -968,6 +1023,8 @@ static int __init parse_vmi(char *arg)
 		disable_noidle = 1;
 	} else if (!strcmp(arg, "disable_noidle"))
 		disable_noidle = 1;
+	else if (!strcmp(arg, "disable_io_ops"))
+		disable_io_ops = 1;
 	return 0;
 }
 
diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h
index e8e0bd6..0669d5f 100644
--- a/include/asm-i386/io.h
+++ b/include/asm-i386/io.h
@@ -272,6 +272,17 @@ static inline void slow_down_io(void) {
 #endif
 }
 
+#define	__BUILDOUTINST(bwl,bw,value,port); \
+	__asm__ __volatile__("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port));
+
+#define	__BUILDININST(bwl,bw,value,port); \
+	__asm__ __volatile__("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port));
+
+#define __BUILDOUTSINST(bwl,addr,count,port) \
+	__asm__ __volatile__("rep; outs" #bwl : "+S"(addr), "+c"(count) : "d"(port));
+
+#define __BUILDINSINST(bwl,addr,count,port) \
+	__asm__ __volatile__("rep; ins" #bwl : "+D"(addr), "+c"(count) : "d"(port));
 #endif
 
 #ifdef CONFIG_X86_NUMAQ
@@ -308,14 +319,22 @@ static inline unsigned type in##bwl(int port) { \
 
 
 #define BUILDIO(bwl,bw,type) \
-static inline void out##bwl##_local(unsigned type value, int port) { \
+static inline void native_out##bwl(unsigned type value, int port) { \
 	__asm__ __volatile__("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port)); \
 } \
-static inline unsigned type in##bwl##_local(int port) { \
+static inline void out##bwl##_local(unsigned type value, int port) { \
+	__BUILDOUTINST(bwl,bw,value,port); \
+} \
+static inline unsigned type native_in##bwl(int port) { \
 	unsigned type value; \
 	__asm__ __volatile__("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port)); \
 	return value; \
 } \
+static inline unsigned type in##bwl##_local(int port) { \
+	unsigned type value; \
+	__BUILDININST(bwl,bw,value,port); \
+	return value; \
+} \
 static inline void out##bwl##_local_p(unsigned type value, int port) { \
 	out##bwl##_local(value, port); \
 	slow_down_io(); \
@@ -335,15 +354,23 @@ static inline unsigned type in##bwl##_p(int port) { \
 	slow_down_io(); \
 	return value; \
 } \
-static inline void outs##bwl(int port, const void *addr, unsigned long count) { \
+static inline void native_outs##bwl(int port, const void *addr, unsigned long count) { \
 	__asm__ __volatile__("rep; outs" #bwl : "+S"(addr), "+c"(count) : "d"(port)); \
 } \
-static inline void ins##bwl(int port, void *addr, unsigned long count) { \
+static inline void native_ins##bwl(int port, void *addr, unsigned long count) { \
 	__asm__ __volatile__("rep; ins" #bwl : "+D"(addr), "+c"(count) : "d"(port)); \
+} \
+static inline void outs##bwl(int port, const void *addr, unsigned long count) { \
+	__BUILDOUTSINST(bwl,addr,count,port); \
+} \
+static inline void ins##bwl(int port, void *addr, unsigned long count) { \
+	__BUILDINSINST(bwl,addr,count,port); \
 }
 
 BUILDIO(b,b,char)
 BUILDIO(w,w,short)
 BUILDIO(l,,int)
 
+#undef BUILDIO
+
 #endif
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 7df88be..ad44e4f 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -141,6 +141,22 @@ struct paravirt_ops
 	void (*set_iopl_mask)(unsigned mask);
 	void (*io_delay)(void);
 
+	void (*outb)(unsigned char value, int port);
+	void (*outw)(unsigned short value, int port);
+	void (*outl)(unsigned int value, int port);
+
+	unsigned char (*inb)(int port);
+	unsigned short (*inw)(int port);
+	unsigned int (*inl)(int port);
+
+	void (*outsb)(int port, const void *addr, unsigned long count);
+	void (*outsw)(int port, const void *addr, unsigned long count);
+	void (*outsl)(int port, const void *addr, unsigned long count);
+	
+	void (*insb)(int port, void *addr, unsigned long count);
+	void (*insw)(int port, void *addr, unsigned long count);
+	void (*insl)(int port, void *addr, unsigned long count);
+
 	/*
 	 * Hooks for intercepting the creation/use/destruction of an
 	 * mm_struct.
@@ -645,6 +661,15 @@ static inline void slow_down_io(void) {
 #endif
 }
 
+#define __BUILDOUTINST(bwl,bw,value,port) \
+        paravirt_ops.out##bwl(value, port)
+#define __BUILDININST(bwl,bw,value,port) \
+        do { value = paravirt_ops.in##bwl(port); } while (0)
+#define __BUILDOUTSINST(bwl,addr,count,port) \
+        paravirt_ops.outs##bwl(port, addr, count)
+#define __BUILDINSINST(bwl,addr,count,port) \
+        paravirt_ops.ins##bwl(port, addr, count)
+
 #ifdef CONFIG_X86_LOCAL_APIC
 /*
  * Basic functions accessing APICs.
diff --git a/include/asm-i386/vmi.h b/include/asm-i386/vmi.h
index eb8bd89..a5072b2 100644
--- a/include/asm-i386/vmi.h
+++ b/include/asm-i386/vmi.h
@@ -92,7 +92,18 @@
 #define VMI_CALL_InvalPage		46
 #define VMI_CALL_FlushTLB		47
 #define VMI_CALL_SetLinearMapping	48
-
+#define VMI_CALL_INL			49
+#define VMI_CALL_INB			50
+#define VMI_CALL_INW			51
+#define VMI_CALL_INSL			52
+#define VMI_CALL_INSB			53
+#define VMI_CALL_INSW			54
+#define VMI_CALL_OUTL			55
+#define VMI_CALL_OUTB			56
+#define VMI_CALL_OUTW			57
+#define VMI_CALL_OUTSL			58
+#define VMI_CALL_OUTSB			59
+#define VMI_CALL_OUTSW			60
 #define VMI_CALL_SetIOPLMask		61
 #define VMI_CALL_SetInitialAPState	62
 #define VMI_CALL_APICWrite		63

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux