Index: b/cipher/rijndael-ssse3-amd64.c =================================================================== --- b/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -37,7 +37,6 @@ #include #include #include -#include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" @@ -116,23 +115,23 @@ #endif #define vpaes_ssse3_prepare_enc(const_ptr) \ + const_ptr = _aes_get_consts(); \ vpaes_ssse3_prepare(); \ - asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ - "movdqa (%q0), %%xmm9 # 0F \n\t" \ + asm volatile ("movdqa (%q0), %%xmm9 # 0F \n\t" \ "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ "movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ "movdqa .Lk_sb1 (%q0), %%xmm13 # sb1u \n\t" \ "movdqa .Lk_sb1+16(%q0), %%xmm12 # sb1t \n\t" \ "movdqa .Lk_sb2 (%q0), %%xmm15 # sb2u \n\t" \ "movdqa .Lk_sb2+16(%q0), %%xmm14 # sb2t \n\t" \ - : "=c" (const_ptr) \ : \ + : "r" (const_ptr) \ : "memory" ) #define vpaes_ssse3_prepare_dec(const_ptr) \ + const_ptr = _aes_get_consts(); \ vpaes_ssse3_prepare(); \ - asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ - "movdqa (%q0), %%xmm9 # 0F \n\t" \ + asm volatile ("movdqa (%q0), %%xmm9 # 0F \n\t" \ "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ "movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ "movdqa .Lk_dsb9 (%q0), %%xmm13 # sb9u \n\t" \ @@ -140,17 +139,23 @@ "movdqa .Lk_dsbd (%q0), %%xmm15 # sbdu \n\t" \ "movdqa .Lk_dsbb (%q0), %%xmm14 # sbbu \n\t" \ "movdqa .Lk_dsbe (%q0), %%xmm8 # sbeu \n\t" \ - : "=c" (const_ptr) \ : \ + : "r" (const_ptr) \ : "memory" ) +static void _aes_schedule_core(void); +static void _aes_encrypt_core(void); +static void _aes_decrypt_core(void); +static const void *_aes_get_consts(void); + void _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; byte ssse3_state[SSSE3_STATE_SIZE]; + void *core = _aes_schedule_core; vpaes_ssse3_prepare(); @@ -159,20 +164,20 @@ "leaq %[buf], %%rdx" "\n\t" "movl %[dir], %%ecx" "\n\t" "movl %[rotoffs], %%r8d" "\n\t" - "call _aes_schedule_core" "\n\t" - : + "call *%[core]" "\n\t" + : [core] "+a" (core) : [key] "m" (*key), [bits] "g" (keybits), [buf] "m" (ctx->keyschenc32[0][0]), [dir] "g" (0), [rotoffs] "g" (48) - : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", + : "r8", "r9", "r10", "r11", "rcx", "rdx", "rdi", "rsi", "cc", "memory"); vpaes_ssse3_cleanup(); /* Save key for setting up decryption. */ - memcpy(&ctx->keyschdec32[0][0], key, keybits / 8); + buf_cpy(&ctx->keyschdec32[0][0], key, keybits / 8); } @@ -182,6 +187,7 @@ { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; byte ssse3_state[SSSE3_STATE_SIZE]; + void *core = _aes_schedule_core; vpaes_ssse3_prepare(); @@ -190,14 +196,14 @@ "leaq %[buf], %%rdx" "\n\t" "movl %[dir], %%ecx" "\n\t" "movl %[rotoffs], %%r8d" "\n\t" - "call _aes_schedule_core" "\n\t" - : + "call *%[core]" "\n\t" + : [core] "+a" (core) : [key] "m" (ctx->keyschdec32[0][0]), [bits] "g" (keybits), [buf] "m" (ctx->keyschdec32[ctx->rounds][0]), [dir] "g" (1), [rotoffs] "g" ((keybits == 192) ? 0 : 32) - : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", + : "r8", "r9", "r10", "r11", "rcx", "rdx", "rdi", "rsi", "cc", "memory"); vpaes_ssse3_cleanup(); @@ -212,11 +218,12 @@ { unsigned int middle_rounds = nrounds - 1; const void *keysched = ctx->keyschenc32; + void *core = _aes_encrypt_core; - asm volatile ("call _aes_encrypt_core" "\n\t" - : "+a" (middle_rounds), "+d" (keysched) + asm volatile ("call *%[core]" "\n\t" + : "+a" (middle_rounds), "+d" (keysched), [core] "+S" (core) : "c" (aes_const_ptr) - : "rdi", "rsi", "cc", "memory"); + : "rdi", "cc", "memory"); } @@ -228,11 +235,12 @@ { unsigned int middle_rounds = nrounds - 1; const void *keysched = ctx->keyschdec32; + void *core = _aes_decrypt_core; - asm volatile ("call _aes_decrypt_core" "\n\t" - : "+a" (middle_rounds), "+d" (keysched) + asm volatile ("call *%[core]" "\n\t" + : "+a" (middle_rounds), "+d" (keysched), [core] "+S" (core) : "c" (aes_const_ptr) - : "rsi", "cc", "memory"); + : "cc", "memory"); } @@ -727,22 +735,16 @@ } -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -# define X(...) -#else -# define X(...) __VA_ARGS__ -#endif - -asm ( - "\n\t" "##" - "\n\t" "## Constant-time SSSE3 AES core implementation." - "\n\t" "##" - "\n\t" "## By Mike Hamburg (Stanford University), 2009" - "\n\t" "## Public domain." - "\n\t" "##" - - "\n\t" ".text" +/* + * Constant-time SSSE3 AES core implementation. + * + * By Mike Hamburg (Stanford University), 2009 + * Public domain. + */ +static void _aes_encrypt_core(void) +{ + asm volatile ( "\n\t" "##" "\n\t" "## _aes_encrypt_core" "\n\t" "##" @@ -760,9 +762,6 @@ "\n\t" "## Preserves %xmm6 - %xmm7 so you get some local vectors" "\n\t" "##" "\n\t" "##" - "\n\t" ".align 16" -X("\n\t" ".type _aes_encrypt_core,@function") - "\n\t" "_aes_encrypt_core:" "\n\t" " leaq .Lk_mc_backward(%rcx), %rdi" "\n\t" " mov $16, %rsi" "\n\t" " movdqa .Lk_ipt (%rcx), %xmm2 # iptlo" @@ -835,18 +834,17 @@ "\n\t" " movdqa .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot" "\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" "\n\t" " pxor %xmm4, %xmm0 # 0 = A" - "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" - "\n\t" " ret" -X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core") + "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"); +} +static void _aes_decrypt_core(void) +{ + asm volatile ( "\n\t" "##" "\n\t" "## Decryption core" "\n\t" "##" "\n\t" "## Same API as encryption core." "\n\t" "##" - "\n\t" ".align 16" -X("\n\t" ".type _aes_decrypt_core,@function") - "\n\t" "_aes_decrypt_core:" "\n\t" " movl %eax, %esi" "\n\t" " shll $4, %esi" "\n\t" " xorl $48, %esi" @@ -935,19 +933,18 @@ "\n\t" " movdqa .Lk_dsbo+16(%rcx), %xmm0 # 0 : sbot" "\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" "\n\t" " pxor %xmm4, %xmm0 # 0 = A" - "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" - "\n\t" " ret" -X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core") + "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"); +} +static void _aes_schedule_core(void) +{ + asm volatile ( "\n\t" "########################################################" "\n\t" "## ##" "\n\t" "## AES key schedule ##" "\n\t" "## ##" "\n\t" "########################################################" - "\n\t" ".align 16" -X("\n\t" ".type _aes_schedule_core,@function") - "\n\t" "_aes_schedule_core:" "\n\t" " # rdi = key" "\n\t" " # rsi = size in bits" "\n\t" " # rdx = buffer" @@ -1304,9 +1301,15 @@ "\n\t" " pxor %xmm5, %xmm5" "\n\t" " pxor %xmm6, %xmm6" "\n\t" " pxor %xmm7, %xmm7" - "\n\t" " pxor %xmm8, %xmm8" - "\n\t" " ret" -X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core") + "\n\t" " pxor %xmm8, %xmm8"); +} + +static const void *_aes_get_consts(void) +{ + const void *consts; + + asm volatile( + "\n\t" " jmp _aes_get_consts_end" "\n\t" "########################################################" "\n\t" "## ##" @@ -1315,7 +1318,6 @@ "\n\t" "########################################################" "\n\t" ".align 16" -X("\n\t" ".type _aes_consts,@object") "\n\t" ".Laes_consts:" "\n\t" "_aes_consts:" "\n\t" " # s0F" @@ -1493,7 +1495,14 @@ "\n\t" " .quad 0xC7AA6DB9D4943E2D" "\n\t" " .quad 0x12D7560F93441D00" "\n\t" " .quad 0xCA4B8159D8C58E9C" -X("\n\t" ".size _aes_consts,.-_aes_consts") -); + + "\n\t" "_aes_get_consts_end:" + "\n\t" " leaq .Laes_consts(%%rip), %q0" + : "=a" (consts) + : + : "cc", "memory" ); + + return consts; +} #endif /* USE_SSSE3 */