Changeset View
Changeset View
Standalone View
Standalone View
b/cipher/rijndael-ssse3-amd64.c
Context not available. | |||||
#include <config.h> | #include <config.h> | ||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> /* for memcmp() */ | |||||
#include "types.h" /* for byte and u32 typedefs */ | #include "types.h" /* for byte and u32 typedefs */ | ||||
#include "g10lib.h" | #include "g10lib.h" | ||||
Context not available. | |||||
#endif | #endif | ||||
#define vpaes_ssse3_prepare_enc(const_ptr) \ | #define vpaes_ssse3_prepare_enc(const_ptr) \ | ||||
const_ptr = _aes_get_consts(); \ | |||||
vpaes_ssse3_prepare(); \ | vpaes_ssse3_prepare(); \ | ||||
asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ | asm volatile ("movdqa (%q0), %%xmm9 # 0F \n\t" \ | ||||
"movdqa (%q0), %%xmm9 # 0F \n\t" \ | |||||
"movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ | "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ | ||||
"movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ | "movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ | ||||
"movdqa .Lk_sb1 (%q0), %%xmm13 # sb1u \n\t" \ | "movdqa .Lk_sb1 (%q0), %%xmm13 # sb1u \n\t" \ | ||||
"movdqa .Lk_sb1+16(%q0), %%xmm12 # sb1t \n\t" \ | "movdqa .Lk_sb1+16(%q0), %%xmm12 # sb1t \n\t" \ | ||||
"movdqa .Lk_sb2 (%q0), %%xmm15 # sb2u \n\t" \ | "movdqa .Lk_sb2 (%q0), %%xmm15 # sb2u \n\t" \ | ||||
"movdqa .Lk_sb2+16(%q0), %%xmm14 # sb2t \n\t" \ | "movdqa .Lk_sb2+16(%q0), %%xmm14 # sb2t \n\t" \ | ||||
: "=c" (const_ptr) \ | |||||
: \ | : \ | ||||
: "r" (const_ptr) \ | |||||
: "memory" ) | : "memory" ) | ||||
#define vpaes_ssse3_prepare_dec(const_ptr) \ | #define vpaes_ssse3_prepare_dec(const_ptr) \ | ||||
const_ptr = _aes_get_consts(); \ | |||||
vpaes_ssse3_prepare(); \ | vpaes_ssse3_prepare(); \ | ||||
asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ | asm volatile ("movdqa (%q0), %%xmm9 # 0F \n\t" \ | ||||
"movdqa (%q0), %%xmm9 # 0F \n\t" \ | |||||
"movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ | "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ | ||||
"movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ | "movdqa .Lk_inv+16(%q0), %%xmm11 # inva \n\t" \ | ||||
"movdqa .Lk_dsb9 (%q0), %%xmm13 # sb9u \n\t" \ | "movdqa .Lk_dsb9 (%q0), %%xmm13 # sb9u \n\t" \ | ||||
Context not available. | |||||
"movdqa .Lk_dsbd (%q0), %%xmm15 # sbdu \n\t" \ | "movdqa .Lk_dsbd (%q0), %%xmm15 # sbdu \n\t" \ | ||||
"movdqa .Lk_dsbb (%q0), %%xmm14 # sbbu \n\t" \ | "movdqa .Lk_dsbb (%q0), %%xmm14 # sbbu \n\t" \ | ||||
"movdqa .Lk_dsbe (%q0), %%xmm8 # sbeu \n\t" \ | "movdqa .Lk_dsbe (%q0), %%xmm8 # sbeu \n\t" \ | ||||
: "=c" (const_ptr) \ | |||||
: \ | : \ | ||||
: "r" (const_ptr) \ | |||||
: "memory" ) | : "memory" ) | ||||
static void _aes_schedule_core(void); | |||||
static void _aes_encrypt_core(void); | |||||
static void _aes_decrypt_core(void); | |||||
static const void *_aes_get_consts(void); | |||||
void | void | ||||
_gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) | _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) | ||||
{ | { | ||||
unsigned int keybits = (ctx->rounds - 10) * 32 + 128; | unsigned int keybits = (ctx->rounds - 10) * 32 + 128; | ||||
byte ssse3_state[SSSE3_STATE_SIZE]; | byte ssse3_state[SSSE3_STATE_SIZE]; | ||||
void *core = _aes_schedule_core; | |||||
vpaes_ssse3_prepare(); | vpaes_ssse3_prepare(); | ||||
Context not available. | |||||
"leaq %[buf], %%rdx" "\n\t" | "leaq %[buf], %%rdx" "\n\t" | ||||
"movl %[dir], %%ecx" "\n\t" | "movl %[dir], %%ecx" "\n\t" | ||||
"movl %[rotoffs], %%r8d" "\n\t" | "movl %[rotoffs], %%r8d" "\n\t" | ||||
"call _aes_schedule_core" "\n\t" | "call *%[core]" "\n\t" | ||||
: | : [core] "+a" (core) | ||||
: [key] "m" (*key), | : [key] "m" (*key), | ||||
[bits] "g" (keybits), | [bits] "g" (keybits), | ||||
[buf] "m" (ctx->keyschenc32[0][0]), | [buf] "m" (ctx->keyschenc32[0][0]), | ||||
[dir] "g" (0), | [dir] "g" (0), | ||||
[rotoffs] "g" (48) | [rotoffs] "g" (48) | ||||
: "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", | : "r8", "r9", "r10", "r11", "rcx", "rdx", "rdi", "rsi", | ||||
"cc", "memory"); | "cc", "memory"); | ||||
vpaes_ssse3_cleanup(); | vpaes_ssse3_cleanup(); | ||||
/* Save key for setting up decryption. */ | /* Save key for setting up decryption. */ | ||||
memcpy(&ctx->keyschdec32[0][0], key, keybits / 8); | buf_cpy(&ctx->keyschdec32[0][0], key, keybits / 8); | ||||
} | } | ||||
Context not available. | |||||
{ | { | ||||
unsigned int keybits = (ctx->rounds - 10) * 32 + 128; | unsigned int keybits = (ctx->rounds - 10) * 32 + 128; | ||||
byte ssse3_state[SSSE3_STATE_SIZE]; | byte ssse3_state[SSSE3_STATE_SIZE]; | ||||
void *core = _aes_schedule_core; | |||||
vpaes_ssse3_prepare(); | vpaes_ssse3_prepare(); | ||||
Context not available. | |||||
"leaq %[buf], %%rdx" "\n\t" | "leaq %[buf], %%rdx" "\n\t" | ||||
"movl %[dir], %%ecx" "\n\t" | "movl %[dir], %%ecx" "\n\t" | ||||
"movl %[rotoffs], %%r8d" "\n\t" | "movl %[rotoffs], %%r8d" "\n\t" | ||||
"call _aes_schedule_core" "\n\t" | "call *%[core]" "\n\t" | ||||
: | : [core] "+a" (core) | ||||
: [key] "m" (ctx->keyschdec32[0][0]), | : [key] "m" (ctx->keyschdec32[0][0]), | ||||
[bits] "g" (keybits), | [bits] "g" (keybits), | ||||
[buf] "m" (ctx->keyschdec32[ctx->rounds][0]), | [buf] "m" (ctx->keyschdec32[ctx->rounds][0]), | ||||
[dir] "g" (1), | [dir] "g" (1), | ||||
[rotoffs] "g" ((keybits == 192) ? 0 : 32) | [rotoffs] "g" ((keybits == 192) ? 0 : 32) | ||||
: "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", | : "r8", "r9", "r10", "r11", "rcx", "rdx", "rdi", "rsi", | ||||
"cc", "memory"); | "cc", "memory"); | ||||
vpaes_ssse3_cleanup(); | vpaes_ssse3_cleanup(); | ||||
Context not available. | |||||
{ | { | ||||
unsigned int middle_rounds = nrounds - 1; | unsigned int middle_rounds = nrounds - 1; | ||||
const void *keysched = ctx->keyschenc32; | const void *keysched = ctx->keyschenc32; | ||||
void *core = _aes_encrypt_core; | |||||
asm volatile ("call _aes_encrypt_core" "\n\t" | asm volatile ("call *%[core]" "\n\t" | ||||
: "+a" (middle_rounds), "+d" (keysched) | : "+a" (middle_rounds), "+d" (keysched), [core] "+S" (core) | ||||
: "c" (aes_const_ptr) | : "c" (aes_const_ptr) | ||||
: "rdi", "rsi", "cc", "memory"); | : "rdi", "cc", "memory"); | ||||
} | } | ||||
Context not available. | |||||
{ | { | ||||
unsigned int middle_rounds = nrounds - 1; | unsigned int middle_rounds = nrounds - 1; | ||||
const void *keysched = ctx->keyschdec32; | const void *keysched = ctx->keyschdec32; | ||||
void *core = _aes_decrypt_core; | |||||
asm volatile ("call _aes_decrypt_core" "\n\t" | asm volatile ("call *%[core]" "\n\t" | ||||
: "+a" (middle_rounds), "+d" (keysched) | : "+a" (middle_rounds), "+d" (keysched), [core] "+S" (core) | ||||
: "c" (aes_const_ptr) | : "c" (aes_const_ptr) | ||||
: "rsi", "cc", "memory"); | : "cc", "memory"); | ||||
} | } | ||||
Context not available. | |||||
} | } | ||||
#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS | /* | ||||
# define X(...) | * Constant-time SSSE3 AES core implementation. | ||||
#else | * | ||||
# define X(...) __VA_ARGS__ | * By Mike Hamburg (Stanford University), 2009 | ||||
#endif | * Public domain. | ||||
*/ | |||||
asm ( | |||||
"\n\t" "##" | |||||
"\n\t" "## Constant-time SSSE3 AES core implementation." | |||||
"\n\t" "##" | |||||
"\n\t" "## By Mike Hamburg (Stanford University), 2009" | |||||
"\n\t" "## Public domain." | |||||
"\n\t" "##" | |||||
"\n\t" ".text" | |||||
static void _aes_encrypt_core(void) | |||||
{ | |||||
asm volatile ( | |||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" "## _aes_encrypt_core" | "\n\t" "## _aes_encrypt_core" | ||||
"\n\t" "##" | "\n\t" "##" | ||||
Context not available. | |||||
"\n\t" "## Preserves %xmm6 - %xmm7 so you get some local vectors" | "\n\t" "## Preserves %xmm6 - %xmm7 so you get some local vectors" | ||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" ".align 16" | |||||
X("\n\t" ".type _aes_encrypt_core,@function") | |||||
"\n\t" "_aes_encrypt_core:" | |||||
"\n\t" " leaq .Lk_mc_backward(%rcx), %rdi" | "\n\t" " leaq .Lk_mc_backward(%rcx), %rdi" | ||||
"\n\t" " mov $16, %rsi" | "\n\t" " mov $16, %rsi" | ||||
"\n\t" " movdqa .Lk_ipt (%rcx), %xmm2 # iptlo" | "\n\t" " movdqa .Lk_ipt (%rcx), %xmm2 # iptlo" | ||||
Context not available. | |||||
"\n\t" " movdqa .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot" | "\n\t" " movdqa .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot" | ||||
"\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" | "\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" | ||||
"\n\t" " pxor %xmm4, %xmm0 # 0 = A" | "\n\t" " pxor %xmm4, %xmm0 # 0 = A" | ||||
"\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" | "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"); | ||||
"\n\t" " ret" | } | ||||
X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core") | |||||
static void _aes_decrypt_core(void) | |||||
{ | |||||
asm volatile ( | |||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" "## Decryption core" | "\n\t" "## Decryption core" | ||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" "## Same API as encryption core." | "\n\t" "## Same API as encryption core." | ||||
"\n\t" "##" | "\n\t" "##" | ||||
"\n\t" ".align 16" | |||||
X("\n\t" ".type _aes_decrypt_core,@function") | |||||
"\n\t" "_aes_decrypt_core:" | |||||
"\n\t" " movl %eax, %esi" | "\n\t" " movl %eax, %esi" | ||||
"\n\t" " shll $4, %esi" | "\n\t" " shll $4, %esi" | ||||
"\n\t" " xorl $48, %esi" | "\n\t" " xorl $48, %esi" | ||||
Context not available. | |||||
"\n\t" " movdqa .Lk_dsbo+16(%rcx), %xmm0 # 0 : sbot" | "\n\t" " movdqa .Lk_dsbo+16(%rcx), %xmm0 # 0 : sbot" | ||||
"\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" | "\n\t" " pshufb %xmm3, %xmm0 # 0 = sb1t" | ||||
"\n\t" " pxor %xmm4, %xmm0 # 0 = A" | "\n\t" " pxor %xmm4, %xmm0 # 0 = A" | ||||
"\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" | "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"); | ||||
"\n\t" " ret" | } | ||||
X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core") | |||||
static void _aes_schedule_core(void) | |||||
{ | |||||
asm volatile ( | |||||
"\n\t" "########################################################" | "\n\t" "########################################################" | ||||
"\n\t" "## ##" | "\n\t" "## ##" | ||||
"\n\t" "## AES key schedule ##" | "\n\t" "## AES key schedule ##" | ||||
"\n\t" "## ##" | "\n\t" "## ##" | ||||
"\n\t" "########################################################" | "\n\t" "########################################################" | ||||
"\n\t" ".align 16" | |||||
X("\n\t" ".type _aes_schedule_core,@function") | |||||
"\n\t" "_aes_schedule_core:" | |||||
"\n\t" " # rdi = key" | "\n\t" " # rdi = key" | ||||
"\n\t" " # rsi = size in bits" | "\n\t" " # rsi = size in bits" | ||||
"\n\t" " # rdx = buffer" | "\n\t" " # rdx = buffer" | ||||
Context not available. | |||||
"\n\t" " pxor %xmm5, %xmm5" | "\n\t" " pxor %xmm5, %xmm5" | ||||
"\n\t" " pxor %xmm6, %xmm6" | "\n\t" " pxor %xmm6, %xmm6" | ||||
"\n\t" " pxor %xmm7, %xmm7" | "\n\t" " pxor %xmm7, %xmm7" | ||||
"\n\t" " pxor %xmm8, %xmm8" | "\n\t" " pxor %xmm8, %xmm8"); | ||||
"\n\t" " ret" | } | ||||
X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core") | |||||
static const void *_aes_get_consts(void) | |||||
{ | |||||
const void *consts; | |||||
asm volatile( | |||||
"\n\t" " jmp _aes_get_consts_end" | |||||
"\n\t" "########################################################" | "\n\t" "########################################################" | ||||
"\n\t" "## ##" | "\n\t" "## ##" | ||||
Context not available. | |||||
"\n\t" "########################################################" | "\n\t" "########################################################" | ||||
"\n\t" ".align 16" | "\n\t" ".align 16" | ||||
X("\n\t" ".type _aes_consts,@object") | |||||
"\n\t" ".Laes_consts:" | "\n\t" ".Laes_consts:" | ||||
"\n\t" "_aes_consts:" | "\n\t" "_aes_consts:" | ||||
"\n\t" " # s0F" | "\n\t" " # s0F" | ||||
Context not available. | |||||
"\n\t" " .quad 0xC7AA6DB9D4943E2D" | "\n\t" " .quad 0xC7AA6DB9D4943E2D" | ||||
"\n\t" " .quad 0x12D7560F93441D00" | "\n\t" " .quad 0x12D7560F93441D00" | ||||
"\n\t" " .quad 0xCA4B8159D8C58E9C" | "\n\t" " .quad 0xCA4B8159D8C58E9C" | ||||
X("\n\t" ".size _aes_consts,.-_aes_consts") | |||||
); | "\n\t" "_aes_get_consts_end:" | ||||
"\n\t" " leaq .Laes_consts(%%rip), %q0" | |||||
: "=a" (consts) | |||||
: | |||||
: "cc", "memory" ); | |||||
return consts; | |||||
} | |||||
#endif /* USE_SSSE3 */ | #endif /* USE_SSSE3 */ | ||||
Context not available. |