[PATCH] tiny improvement to x86_64 asm aes encryption

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Basically, when de do:

        encrypt_round(aes_ft_tab,-96)
        encrypt_round(aes_ft_tab,-80)

first encrypt_round produces results in R5,R6,R3,R4,
and then moves R5->R1, R6->R2 for use in second one:

#define encrypt_round(TAB,OFFSET) \
        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
        move_regs(R1,R2,R5,R6)


But since we _always_ call them in pairs, we can just
swap arguments in second one, eliminating move_regs!


#define encrypt_round1(TAB,OFFSET) \
        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
                         ^^^^^                    ^^^^^
#define encrypt_round2(TAB,OFFSET) \
        round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
                         ^^^^^                    ^^^^^
...
        encrypt_round1(aes_ft_tab,-96)
        encrypt_round2(aes_ft_tab,-80)

"encrypt_final" and "return" macros are changed accordingly.

Of course same thing is done on decrypt path.

Patch is not tested.
--
vda
--- aes-x86_64-asm.S.org	Mon Aug 29 02:41:01 2005
+++ aes-x86_64-asm.S	Thu Nov 24 12:34:35 2005
@@ -124,63 +124,63 @@
 	xorl	TAB+1024(,r1,4),r3 ## E;\
 	xorl	TAB(,r2,4),r4 ## E;
 
-#define move_regs(r1,r2,r3,r4) \
-	movl	r3 ## E,r1 ## E;	\
-	movl	r4 ## E,r2 ## E;
-
 #define entry(FUNC,BASE,B128,B192) \
 	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
+#define return epilogue(R8,R6,R9,R7,R1,R2,R3,R4,R11)
 
-#define encrypt_final(TAB,OFFSET) \
+#define encrypt_round1(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
 
-#define decrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
+#define encrypt_round2(TAB,OFFSET) \
+	round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
+
+#define encrypt_final2(TAB,OFFSET) \
+	round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
 
-#define decrypt_final(TAB,OFFSET) \
+#define decrypt_round1(TAB,OFFSET) \
 	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
 
+#define decrypt_round2(TAB,OFFSET) \
+	round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
+#define decrypt_final2(TAB,OFFSET) \
+	round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
 /* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
 
 	entry(aes_encrypt,0,enc128,enc192)
-	encrypt_round(aes_ft_tab,-96)
-	encrypt_round(aes_ft_tab,-80)
-enc192:	encrypt_round(aes_ft_tab,-64)
-	encrypt_round(aes_ft_tab,-48)
-enc128:	encrypt_round(aes_ft_tab,-32)
-	encrypt_round(aes_ft_tab,-16)
-	encrypt_round(aes_ft_tab,  0)
-	encrypt_round(aes_ft_tab, 16)
-	encrypt_round(aes_ft_tab, 32)
-	encrypt_round(aes_ft_tab, 48)
-	encrypt_round(aes_ft_tab, 64)
-	encrypt_round(aes_ft_tab, 80)
-	encrypt_round(aes_ft_tab, 96)
-	encrypt_final(aes_fl_tab,112)
+	encrypt_round1(aes_ft_tab,-96)
+	encrypt_round2(aes_ft_tab,-80)
+enc192:	encrypt_round1(aes_ft_tab,-64)
+	encrypt_round2(aes_ft_tab,-48)
+enc128:	encrypt_round1(aes_ft_tab,-32)
+	encrypt_round2(aes_ft_tab,-16)
+	encrypt_round1(aes_ft_tab,  0)
+	encrypt_round2(aes_ft_tab, 16)
+	encrypt_round1(aes_ft_tab, 32)
+	encrypt_round2(aes_ft_tab, 48)
+	encrypt_round1(aes_ft_tab, 64)
+	encrypt_round2(aes_ft_tab, 80)
+	encrypt_round1(aes_ft_tab, 96)
+	encrypt_final2(aes_fl_tab,112)
 	return
 
 /* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
 
 	entry(aes_decrypt,240,dec128,dec192)
-	decrypt_round(aes_it_tab,-96)
-	decrypt_round(aes_it_tab,-80)
-dec192:	decrypt_round(aes_it_tab,-64)
-	decrypt_round(aes_it_tab,-48)
-dec128:	decrypt_round(aes_it_tab,-32)
-	decrypt_round(aes_it_tab,-16)
-	decrypt_round(aes_it_tab,  0)
-	decrypt_round(aes_it_tab, 16)
-	decrypt_round(aes_it_tab, 32)
-	decrypt_round(aes_it_tab, 48)
-	decrypt_round(aes_it_tab, 64)
-	decrypt_round(aes_it_tab, 80)
-	decrypt_round(aes_it_tab, 96)
-	decrypt_final(aes_il_tab,112)
+	decrypt_round1(aes_it_tab,-96)
+	decrypt_round2(aes_it_tab,-80)
+dec192:	decrypt_round1(aes_it_tab,-64)
+	decrypt_round2(aes_it_tab,-48)
+dec128:	decrypt_round1(aes_it_tab,-32)
+	decrypt_round2(aes_it_tab,-16)
+	decrypt_round1(aes_it_tab,  0)
+	decrypt_round2(aes_it_tab, 16)
+	decrypt_round1(aes_it_tab, 32)
+	decrypt_round2(aes_it_tab, 48)
+	decrypt_round1(aes_it_tab, 64)
+	decrypt_round2(aes_it_tab, 80)
+	decrypt_round1(aes_it_tab, 96)
+	decrypt_final2(aes_il_tab,112)
 	return

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux