[PATCH] introduce generic 64bit rotations and i386 asm optimized version

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is done because on 32bit arch gcc generates suboptimal code
from C implementation:

old:
      40:       83 ea 80                sub    $0xffffff80,%edx
      43:       89 95 50 ff ff ff       mov    %edx,0xffffff50(%ebp)
      49:       8b bd 50 ff ff ff       mov    0xffffff50(%ebp),%edi
      4f:       8b 4f f0                mov    0xfffffff0(%edi),%ecx
      52:       8b 5f f4                mov    0xfffffff4(%edi),%ebx
      55:       89 ce                   mov    %ecx,%esi
      57:       0f ac de 13             shrd   $0x13,%ebx,%esi
      5b:       31 c0                   xor    %eax,%eax
      5d:       89 df                   mov    %ebx,%edi
      5f:       89 ca                   mov    %ecx,%edx
      61:       c1 e2 0d                shl    $0xd,%edx
      64:       09 c6                   or     %eax,%esi
      66:       c1 ef 13                shr    $0x13,%edi
      69:       89 d8                   mov    %ebx,%eax
      6b:       09 d7                   or     %edx,%edi
      6d:       c1 e8 1d                shr    $0x1d,%eax
      70:       31 d2                   xor    %edx,%edx
      72:       89 85 40 ff ff ff       mov    %eax,0xffffff40(%ebp)
      78:       89 95 44 ff ff ff       mov    %edx,0xffffff44(%ebp)
new:
      40:       83 ea 80                sub    $0xffffff80,%edx
      43:       89 95 48 ff ff ff       mov    %edx,0xffffff48(%ebp)
      49:       8b 9d 48 ff ff ff       mov    0xffffff48(%ebp),%ebx
      4f:       8b 4b f0                mov    0xfffffff0(%ebx),%ecx
      52:       8b 5b f4                mov    0xfffffff4(%ebx),%ebx
      55:       89 c8                   mov    %ecx,%eax
      57:       89 da                   mov    %ebx,%edx
      59:       0f ac d0 13             shrd   $0x13,%edx,%eax
      5d:       0f ac ca 13             shrd   $0x13,%ecx,%edx
      61:       89 45 a0                mov    %eax,0xffffffa0(%ebp)
      64:       89 55 a4                mov    %edx,0xffffffa4(%ebp)
--
vda
diff -urpN 2.6.12-rc2.1.be/include/asm-i386/bitops.h 2.6.12-rc2.2.ror/include/asm-i386/bitops.h
--- 2.6.12-rc2.1.be/include/asm-i386/bitops.h	Tue Oct 19 00:54:36 2004
+++ 2.6.12-rc2.2.ror/include/asm-i386/bitops.h	Tue Apr 19 08:20:07 2005
@@ -7,6 +7,7 @@
 
 #include <linux/config.h>
 #include <linux/compiler.h>
+#include <linux/types.h>
 
 /*
  * These have to be done with inline assembly: that way the bit-setting
@@ -431,9 +432,81 @@ static inline int ffs(int x)
 #define hweight16(x) generic_hweight16(x)
 #define hweight8(x) generic_hweight8(x)
 
-#endif /* __KERNEL__ */
+/*
+ * 64bit rotations
+ * (gcc3 seems to be clever enough to do 32bit ones just fine)
+ *
+ * Why "i" and "I" constraints do not work? gcc says:
+ * "warning: asm operand 2 probably doesn't match constraints"
+ * "error: impossible constraint in 'asm'"
+ * Will use "Ic" for now. If gcc will fail to do const propagation
+ * and will try to stuff constant into ecx, shld %3,... will expand
+ * to shld %ecx,... and assembler will moan.
+ * Do not 'fix' by changing to shld %b3,...
+ *
+ * Have to stick to edx,eax pair only because
+ * gcc has limited support for 64bit asm parameters
+ */
+#define constant_rol64(v,c) \
+	({						\
+	u64 vv = (v);					\
+	if(!(c&63)) {					\
+	} else if((c&63)==1) {				\
+		asm (					\
+		"	shldl	$1,%%edx,%%eax	\n"	\
+		"	rcll	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)==63) {				\
+		asm (					\
+		"	shrdl	$1,%%edx,%%eax	\n"	\
+		"	rcrl	$1,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	} else if((c&63)<32) {				\
+		asm (					\
+		"	shldl	%3,%%edx,%%eax	\n"	\
+		"	shldl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv), "r" (vv), "Ic" (c&63)	\
+		);					\
+	} else if((c&63)>32) {				\
+		asm (					\
+		"	shrdl	%3,%%edx,%%eax	\n"	\
+		"	shrdl	%3,%2,%%edx	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv), "r" (vv), "Ic" (64-(c&63))	\
+		);					\
+	} else /* (c&63)==32 */ {			\
+		asm (					\
+		"	xchgl	%%edx,%%eax	\n"	\
+		: "=&A" (vv)				\
+		: "0" (vv)				\
+		);					\
+	}						\
+	vv;						\
+	})
+/*
+ * Unfortunately 64bit rotations with non-constant count
+ * have issues with cnt>=32. Using C code instead
+ */
+static inline u64 rol64(u64 x,int num) {
+	if(__builtin_constant_p(num))
+		return constant_rol64(x,num);
+	/* Hmmm... shall we do cnt&=63 here? */
+	return ((x<<num) | (x>>(64-num)));
+}
+static inline u64 ror64(u64 x,int num) {
+	if(__builtin_constant_p(num))
+		return constant_rol64(x,(64-num));
+	return ((x>>num) | (x<<(64-num)));
+}
+
+#define ARCH_HAS_ROL64
+#define ARCH_HAS_ROR64
 
-#ifdef __KERNEL__
 
 #define ext2_set_bit(nr,addr) \
 	__test_and_set_bit((nr),(unsigned long*)addr)
diff -urpN 2.6.12-rc2.1.be/include/linux/bitops.h 2.6.12-rc2.2.ror/include/linux/bitops.h
--- 2.6.12-rc2.1.be/include/linux/bitops.h	Mon Apr 18 22:55:10 2005
+++ 2.6.12-rc2.2.ror/include/linux/bitops.h	Tue Apr 19 00:25:28 2005
@@ -41,7 +41,7 @@ static inline int generic_ffs(int x)
  * fls: find last bit set.
  */
 
-static __inline__ int generic_fls(int x)
+static inline int generic_fls(int x)
 {
 	int r = 32;
 
@@ -76,7 +76,7 @@ static __inline__ int generic_fls(int x)
  */
 #include <asm/bitops.h>
 
-static __inline__ int get_bitmask_order(unsigned int count)
+static inline int get_bitmask_order(unsigned int count)
 {
 	int order;
 	
@@ -155,5 +155,31 @@ static inline __u32 ror32(__u32 word, un
 {
 	return (word >> shift) | (word << (32 - shift));
 }
+
+/*
+ * rol64 - rotate a 64-bit value left
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+#ifndef ARCH_HAS_ROL64
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (64 - shift));
+}
+#endif
+
+/*
+ * ror64 - rotate a 64-bit value right
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+#ifndef ARCH_HAS_ROR64
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (64 - shift));
+}
+#endif
 
 #endif

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux