diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 3c4eae0b..bba815bb 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,130 +1,131 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c cipher-cfb.c cipher-ofb.c cipher-ctr.c cipher-aeswrap.c \ cipher-ccm.c cipher-cmac.c cipher-gcm.c cipher-gcm-intel-pclmul.c \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ cipher-poly1305.c cipher-ocb.c cipher-xts.c \ cipher-selftest.c cipher-selftest.h \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ hmac-tests.c \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ +asm-common-amd64.h \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S chacha20-armv7-neon.S \ chacha20-aarch64.S \ crc.c \ crc-intel-pclmul.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ rijndael.c rijndael-internal.h rijndael-tables.h rijndael-aesni.c \ rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \ rijndael-aarch64.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sm3.c \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ blake2.c gost28147.lo: gost-sb.h gost-sb.h: gost-s-box ./gost-s-box $@ gost-s-box: gost-s-box.c $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S index 2e52ea00..c08f3453 100644 --- a/cipher/arcfour-amd64.S +++ b/cipher/arcfour-amd64.S @@ -1,104 +1,102 @@ /* ** RC4 implementation optimized for AMD64. ** ** Author: Marc Bevand ** Licence: I hereby disclaim the copyright on this code and place it ** in the public domain. ** ** The throughput achieved by this code is about 320 MBytes/sec, on ** a 1.8 GHz AMD Opteron (rev C0) processor. ** ** 2013/12/20 : ** - Integrated to libgcrypt ** - 4.18 cycles/byte on Intel i5-4570 */ #ifdef __x86_64__ #include #if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text .align 16 .globl _gcry_arcfour_amd64 ELF(.type _gcry_arcfour_amd64,@function) _gcry_arcfour_amd64: + ENTER_SYSV_FUNC_PARAMS_0_4 push %rbp push %rbx mov %rdi, %rbp # key = ARG(key) mov %rsi, %rbx # rbx = ARG(len) mov %rdx, %rsi # in = ARG(in) mov %rcx, %rdi # out = ARG(out) mov (4*256)(%rbp), %ecx # x = key->x mov (4*256+4)(%rbp),%edx # y = key->y inc %rcx # x++ and $255, %rcx # x &= 0xff lea -8(%rbx,%rsi), %rbx # rbx = in+len-8 mov %rbx, %r9 # tmp = in+len-8 mov (%rbp,%rcx,4), %eax # tx = d[x] cmp %rsi, %rbx # cmp in with in+len-8 jl .Lend # jump if (in+len-8 < in) .Lstart: add $8, %rsi # increment in add $8, %rdi # increment out # generate the next 8 bytes of the rc4 stream into %r8 mov $8, %r11 # byte counter 1: add %al, %dl # y += tx mov (%rbp,%rdx,4), %ebx # ty = d[y] mov %ebx, (%rbp,%rcx,4) # d[x] = ty add %al, %bl # val = ty + tx mov %eax, (%rbp,%rdx,4) # d[y] = tx inc %cl # x++ (NEXT ROUND) mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) shl $8, %r8 movb (%rbp,%rbx,4), %r8b # val = d[val] dec %r11b jnz 1b # xor 8 bytes bswap %r8 xor -8(%rsi), %r8 cmp %r9, %rsi # cmp in+len-8 with in mov %r8, -8(%rdi) jle .Lstart # jump if (in <= in+len-8) .Lend: add $8, %r9 # tmp = in+len # handle the last bytes, one by one 1: cmp %rsi, %r9 # cmp in with in+len jle .Lfinished # jump if (in+len <= in) add %al, %dl # y += tx mov (%rbp,%rdx,4), %ebx # ty = d[y] mov %ebx, (%rbp,%rcx,4) # d[x] = ty add %al, %bl # val = ty + tx mov %eax, (%rbp,%rdx,4) # d[y] = tx inc %cl # x++ (NEXT ROUND) mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) movb (%rbp,%rbx,4), %r8b # val = d[val] xor (%rsi), %r8b # xor 1 byte movb %r8b, (%rdi) inc %rsi # in++ inc %rdi # out++ jmp 1b .Lfinished: dec %rcx # x-- movb %cl, (4*256)(%rbp) # key->y = y movb %dl, (4*256+4)(%rbp) # key->x = x pop %rbx pop %rbp + EXIT_SYSV_FUNC ret .L__gcry_arcfour_amd64_end: ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64) #endif #endif diff --git a/cipher/arcfour.c b/cipher/arcfour.c index 44e8ef46..085df9bb 100644 --- a/cipher/arcfour.c +++ b/cipher/arcfour.c @@ -1,227 +1,213 @@ /* arcfour.c - The arcfour stream cipher * Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * * For a description of the algorithm, see: * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. * ISBN 0-471-11709-9. Pages 397 ff. */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif static const char *selftest(void); #ifdef USE_AMD64_ASM typedef struct { u32 sbox[256]; u32 idx_i, idx_j; } ARCFOUR_context; void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata, byte *outdata); static void encrypt_stream (void *context, byte *outbuf, const byte *inbuf, size_t length) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - const void *fn = _gcry_arcfour_amd64; - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("callq *%0\n\t" - : "+a" (fn), - "+D" (context), - "+S" (length), - "+d" (inbuf), - "+c" (outbuf) - : - : "cc", "memory", "r8", "r9", "r10", "r11"); -#else _gcry_arcfour_amd64 (context, length, inbuf, outbuf ); -#endif } #else /*!USE_AMD64_ASM*/ typedef struct { byte sbox[256]; int idx_i, idx_j; } ARCFOUR_context; static void do_encrypt_stream( ARCFOUR_context *ctx, byte *outbuf, const byte *inbuf, size_t length ) { #ifndef __i386__ register unsigned int i = ctx->idx_i; register byte j = ctx->idx_j; register byte *sbox = ctx->sbox; register byte t, u; while ( length-- ) { i++; t = sbox[(byte)i]; j += t; u = sbox[j]; sbox[(byte)i] = u; u += t; sbox[j] = t; *outbuf++ = sbox[u] ^ *inbuf++; } ctx->idx_i = (byte)i; ctx->idx_j = (byte)j; #else /*__i386__*/ /* Old implementation of arcfour is faster on i386 than the version above. * This is because version above increases register pressure which on i386 * would push some of the variables to memory/stack. Therefore keep this * version for i386 to avoid regressing performance. */ register int i = ctx->idx_i; register int j = ctx->idx_j; register byte *sbox = ctx->sbox; register int t; while ( length-- ) { i++; i = i & 255; /* The and-op seems to be faster than the mod-op. */ j += sbox[i]; j &= 255; t = sbox[i]; sbox[i] = sbox[j]; sbox[j] = t; *outbuf++ = *inbuf++ ^ sbox[(sbox[i] + sbox[j]) & 255]; } ctx->idx_i = i; ctx->idx_j = j; #endif } static void encrypt_stream (void *context, byte *outbuf, const byte *inbuf, size_t length) { ARCFOUR_context *ctx = (ARCFOUR_context *) context; do_encrypt_stream (ctx, outbuf, inbuf, length ); _gcry_burn_stack (64); } #endif /*!USE_AMD64_ASM*/ static gcry_err_code_t do_arcfour_setkey (void *context, const byte *key, unsigned int keylen) { static int initialized; static const char* selftest_failed; int i, j; byte karr[256]; ARCFOUR_context *ctx = (ARCFOUR_context *) context; if (!initialized ) { initialized = 1; selftest_failed = selftest(); if( selftest_failed ) log_error ("ARCFOUR selftest failed (%s)\n", selftest_failed ); } if( selftest_failed ) return GPG_ERR_SELFTEST_FAILED; if( keylen < 40/8 ) /* we want at least 40 bits */ return GPG_ERR_INV_KEYLEN; ctx->idx_i = ctx->idx_j = 0; for (i=0; i < 256; i++ ) ctx->sbox[i] = i; for (i=j=0; i < 256; i++,j++ ) { if (j >= keylen) j = 0; karr[i] = key[j]; } for (i=j=0; i < 256; i++ ) { int t; j = (j + ctx->sbox[i] + karr[i]) & 255; t = ctx->sbox[i]; ctx->sbox[i] = ctx->sbox[j]; ctx->sbox[j] = t; } wipememory( karr, sizeof(karr) ); return GPG_ERR_NO_ERROR; } static gcry_err_code_t arcfour_setkey ( void *context, const byte *key, unsigned int keylen ) { ARCFOUR_context *ctx = (ARCFOUR_context *) context; gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen ); return rc; } static const char* selftest(void) { ARCFOUR_context ctx; byte scratch[16]; /* Test vector from Cryptlib labeled there: "from the State/Commerce Department". */ static const byte key_1[] = { 0x61, 0x8A, 0x63, 0xD2, 0xFB }; static const byte plaintext_1[] = { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C }; static const byte ciphertext_1[] = { 0xF1, 0x38, 0x29, 0xC9, 0xDE }; arcfour_setkey( &ctx, key_1, sizeof(key_1)); encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1)); if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1))) return "Arcfour encryption test 1 failed."; arcfour_setkey( &ctx, key_1, sizeof(key_1)); encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */ if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1))) return "Arcfour decryption test 1 failed."; return NULL; } gcry_cipher_spec_t _gcry_cipher_spec_arcfour = { GCRY_CIPHER_ARCFOUR, {0, 0}, "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context), arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream, }; diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h new file mode 100644 index 00000000..7eb42649 --- /dev/null +++ b/cipher/asm-common-amd64.h @@ -0,0 +1,90 @@ +/* asm-common-amd64.h - Common macros for AMD64 assembly + * + * Copyright (C) 2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AMD64_H +#define GCRY_ASM_COMMON_AMD64_H + +#include + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define rRIP (%rip) +#else +# define rRIP +#endif + +#ifdef __PIC__ +# define RIP %rip +#else +# define RIP +#endif + +#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__) +# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg +#else +# ifdef __code_model_large__ +# define GET_EXTERN_POINTER(name, reg) \ + pushq %r15; \ + pushq %r14; \ + 1: leaq 1b(%rip), reg; \ + movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \ + movabsq $name@GOT, %r15; \ + addq %r14, reg; \ + popq %r14; \ + movq (reg, %r15), reg; \ + popq %r15; +# else +# define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg +# endif +#endif + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ENTER_SYSV_FUNC_PARAMS_0_4 \ + pushq %rdi; \ + pushq %rsi; \ + movq %rcx, %rdi; \ + movq %rdx, %rsi; \ + movq %r8, %rdx; \ + movq %r9, %rcx; \ + +# define ENTER_SYSV_FUNC_PARAMS_5 \ + ENTER_SYSV_FUNC_PARAMS_0_4; \ + movq 0x38(%rsp), %r8; + +# define ENTER_SYSV_FUNC_PARAMS_6 \ + ENTER_SYSV_FUNC_PARAMS_5; \ + movq 0x40(%rsp), %r9; + +# define EXIT_SYSV_FUNC \ + popq %rsi; \ + popq %rdi; +#else +# define ENTER_SYSV_FUNC_PARAMS_0_4 +# define ENTER_SYSV_FUNC_PARAMS_5 +# define ENTER_SYSV_FUNC_PARAMS_6 +# define EXIT_SYSV_FUNC +#endif + +#endif /* GCRY_ASM_COMMON_AMD64_H */ diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S index 21b63fc1..02d3b710 100644 --- a/cipher/blowfish-amd64.S +++ b/cipher/blowfish-amd64.S @@ -1,541 +1,555 @@ /* blowfish-amd64.S - AMD64 assembly implementation of Blowfish cipher * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifdef __x86_64 #include #if defined(USE_BLOWFISH) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text /* structure of BLOWFISH_context: */ #define s0 0 #define s1 ((s0) + 256 * 4) #define s2 ((s1) + 256 * 4) #define s3 ((s2) + 256 * 4) #define p ((s3) + 256 * 4) /* register macros */ #define CTX %rdi #define RIO %rsi #define RX0 %rax #define RX1 %rbx #define RX2 %rcx #define RX3 %rdx #define RX0d %eax #define RX1d %ebx #define RX2d %ecx #define RX3d %edx #define RX0bl %al #define RX1bl %bl #define RX2bl %cl #define RX3bl %dl #define RX0bh %ah #define RX1bh %bh #define RX2bh %ch #define RX3bh %dh #define RT0 %rbp #define RT1 %rsi #define RT2 %r8 #define RT3 %r9 #define RT0d %ebp #define RT1d %esi #define RT2d %r8d #define RT3d %r9d #define RKEY %r10 /*********************************************************************** * 1-way blowfish ***********************************************************************/ #define F() \ movzbl RX0bh, RT1d; \ movzbl RX0bl, RT3d; \ rorq $16, RX0; \ movzbl RX0bh, RT0d; \ movzbl RX0bl, RT2d; \ rorq $16, RX0; \ movl s0(CTX,RT0,4), RT0d; \ addl s1(CTX,RT2,4), RT0d; \ xorl s2(CTX,RT1,4), RT0d; \ addl s3(CTX,RT3,4), RT0d; \ xorq RT0, RX0; #define load_roundkey_enc(n) \ movq p+4*(n)(CTX), RX3; #define add_roundkey_enc() \ xorq RX3, RX0; #define round_enc(n) \ add_roundkey_enc(); \ load_roundkey_enc(n); \ \ F(); \ F(); #define load_roundkey_dec(n) \ movq p+4*(n-1)(CTX), RX3; \ rorq $32, RX3; #define add_roundkey_dec() \ xorq RX3, RX0; #define round_dec(n) \ add_roundkey_dec(); \ load_roundkey_dec(n); \ \ F(); \ F(); #define read_block() \ movq (RIO), RX0; \ rorq $32, RX0; \ bswapq RX0; #define write_block() \ bswapq RX0; \ movq RX0, (RIO); .align 8 ELF(.type __blowfish_enc_blk1,@function;) __blowfish_enc_blk1: /* input: * %rdi: ctx, CTX * RX0: input plaintext block * output: * RX0: output plaintext block */ movq %rbp, %r11; load_roundkey_enc(0); round_enc(2); round_enc(4); round_enc(6); round_enc(8); round_enc(10); round_enc(12); round_enc(14); round_enc(16); add_roundkey_enc(); movq %r11, %rbp; ret; ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) .align 8 .globl _gcry_blowfish_amd64_do_encrypt ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;) _gcry_blowfish_amd64_do_encrypt: /* input: * %rdi: ctx, CTX * %rsi: u32 *ret_xl * %rdx: u32 *ret_xr */ + ENTER_SYSV_FUNC_PARAMS_0_4 + movl (%rdx), RX0d; shlq $32, RX0; movl (%rsi), RT3d; movq %rdx, %r10; orq RT3, RX0; movq %rsi, RX2; call __blowfish_enc_blk1; movl RX0d, (%r10); shrq $32, RX0; movl RX0d, (RX2); + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) .align 8 .globl _gcry_blowfish_amd64_encrypt_block ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;) _gcry_blowfish_amd64_encrypt_block: /* input: * %rdi: ctx, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 movq %rsi, %r10; movq %rdx, RIO; read_block(); call __blowfish_enc_blk1; movq %r10, RIO; write_block(); + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) .align 8 .globl _gcry_blowfish_amd64_decrypt_block ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;) _gcry_blowfish_amd64_decrypt_block: /* input: * %rdi: ctx, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + movq %rbp, %r11; movq %rsi, %r10; movq %rdx, RIO; read_block(); load_roundkey_dec(17); round_dec(15); round_dec(13); round_dec(11); round_dec(9); round_dec(7); round_dec(5); round_dec(3); round_dec(1); add_roundkey_dec(); movq %r10, RIO; write_block(); movq %r11, %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) /********************************************************************** 4-way blowfish, four blocks parallel **********************************************************************/ #define F4(x) \ movzbl x ## bh, RT1d; \ movzbl x ## bl, RT3d; \ rorq $16, x; \ movzbl x ## bh, RT0d; \ movzbl x ## bl, RT2d; \ rorq $16, x; \ movl s0(CTX,RT0,4), RT0d; \ addl s1(CTX,RT2,4), RT0d; \ xorl s2(CTX,RT1,4), RT0d; \ addl s3(CTX,RT3,4), RT0d; \ xorq RT0, x; #define add_preloaded_roundkey4() \ xorq RKEY, RX0; \ xorq RKEY, RX1; \ xorq RKEY, RX2; \ xorq RKEY, RX3; #define preload_roundkey_enc(n) \ movq p+4*(n)(CTX), RKEY; #define add_roundkey_enc4(n) \ add_preloaded_roundkey4(); \ preload_roundkey_enc(n + 2); #define round_enc4(n) \ add_roundkey_enc4(n); \ \ F4(RX0); \ F4(RX1); \ F4(RX2); \ F4(RX3); \ \ F4(RX0); \ F4(RX1); \ F4(RX2); \ F4(RX3); #define preload_roundkey_dec(n) \ movq p+4*((n)-1)(CTX), RKEY; \ rorq $32, RKEY; #define add_roundkey_dec4(n) \ add_preloaded_roundkey4(); \ preload_roundkey_dec(n - 2); #define round_dec4(n) \ add_roundkey_dec4(n); \ \ F4(RX0); \ F4(RX1); \ F4(RX2); \ F4(RX3); \ \ F4(RX0); \ F4(RX1); \ F4(RX2); \ F4(RX3); #define inbswap_block4() \ rorq $32, RX0; \ bswapq RX0; \ rorq $32, RX1; \ bswapq RX1; \ rorq $32, RX2; \ bswapq RX2; \ rorq $32, RX3; \ bswapq RX3; #define inctrswap_block4() \ rorq $32, RX0; \ rorq $32, RX1; \ rorq $32, RX2; \ rorq $32, RX3; #define outbswap_block4() \ bswapq RX0; \ bswapq RX1; \ bswapq RX2; \ bswapq RX3; .align 8 ELF(.type __blowfish_enc_blk4,@function;) __blowfish_enc_blk4: /* input: * %rdi: ctx, CTX * RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks * output: * RX0,RX1,RX2,RX3: four output ciphertext blocks */ preload_roundkey_enc(0); round_enc4(0); round_enc4(2); round_enc4(4); round_enc4(6); round_enc4(8); round_enc4(10); round_enc4(12); round_enc4(14); add_preloaded_roundkey4(); outbswap_block4(); ret; ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) .align 8 ELF(.type __blowfish_dec_blk4,@function;) __blowfish_dec_blk4: /* input: * %rdi: ctx, CTX * RX0,RX1,RX2,RX3: four input ciphertext blocks * output: * RX0,RX1,RX2,RX3: four output plaintext blocks */ preload_roundkey_dec(17); inbswap_block4(); round_dec4(17); round_dec4(15); round_dec4(13); round_dec4(11); round_dec4(9); round_dec4(7); round_dec4(5); round_dec4(3); add_preloaded_roundkey4(); outbswap_block4(); ret; ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) .align 8 .globl _gcry_blowfish_amd64_ctr_enc ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;) _gcry_blowfish_amd64_ctr_enc: /* input: * %rdi: ctx, CTX * %rsi: dst (4 blocks) * %rdx: src (4 blocks) * %rcx: iv (big endian, 64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; /* %r11-%r13 are not used by __blowfish_enc_blk4 */ movq %rcx, %r13; /*iv*/ movq %rdx, %r12; /*src*/ movq %rsi, %r11; /*dst*/ /* load IV and byteswap */ movq (%r13), RT0; bswapq RT0; movq RT0, RX0; /* construct IVs */ leaq 1(RT0), RX1; leaq 2(RT0), RX2; leaq 3(RT0), RX3; leaq 4(RT0), RT0; bswapq RT0; inctrswap_block4(); /* store new IV */ movq RT0, (%r13); call __blowfish_enc_blk4; /* XOR key-stream with plaintext */ xorq 0 * 8(%r12), RX0; xorq 1 * 8(%r12), RX1; xorq 2 * 8(%r12), RX2; xorq 3 * 8(%r12), RX3; movq RX0, 0 * 8(%r11); movq RX1, 1 * 8(%r11); movq RX2, 2 * 8(%r11); movq RX3, 3 * 8(%r11); popq %r13; popq %r12; popq %rbx; popq %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) .align 8 .globl _gcry_blowfish_amd64_cbc_dec ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;) _gcry_blowfish_amd64_cbc_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (4 blocks) * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; /* %r11-%r13 are not used by __blowfish_dec_blk4 */ movq %rsi, %r11; /*dst*/ movq %rdx, %r12; /*src*/ movq %rcx, %r13; /*iv*/ /* load input */ movq 0 * 8(%r12), RX0; movq 1 * 8(%r12), RX1; movq 2 * 8(%r12), RX2; movq 3 * 8(%r12), RX3; call __blowfish_dec_blk4; movq 3 * 8(%r12), RT0; xorq (%r13), RX0; xorq 0 * 8(%r12), RX1; xorq 1 * 8(%r12), RX2; xorq 2 * 8(%r12), RX3; movq RT0, (%r13); /* store new IV */ movq RX0, 0 * 8(%r11); movq RX1, 1 * 8(%r11); movq RX2, 2 * 8(%r11); movq RX3, 3 * 8(%r11); popq %r13; popq %r12; popq %rbx; popq %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) .align 8 .globl _gcry_blowfish_amd64_cfb_dec ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;) _gcry_blowfish_amd64_cfb_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (4 blocks) * %rdx: src (4 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; /* %r11-%r13 are not used by __blowfish_enc_blk4 */ movq %rcx, %r13; /*iv*/ movq %rdx, %r12; /*src*/ movq %rsi, %r11; /*dst*/ /* Load input */ movq (%r13), RX0; movq 0 * 8(%r12), RX1; movq 1 * 8(%r12), RX2; movq 2 * 8(%r12), RX3; inbswap_block4(); /* Update IV */ movq 3 * 8(%r12), RT0; movq RT0, (%r13); call __blowfish_enc_blk4; xorq 0 * 8(%r12), RX0; xorq 1 * 8(%r12), RX1; xorq 2 * 8(%r12), RX2; xorq 3 * 8(%r12), RX3; movq RX0, 0 * 8(%r11); movq RX1, 1 * 8(%r11); movq RX2, 2 * 8(%r11); movq RX3, 3 * 8(%r11); popq %r13; popq %r12; popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) #endif /*defined(USE_BLOWFISH)*/ #endif /*__x86_64*/ diff --git a/cipher/blowfish.c b/cipher/blowfish.c index a3fc26ce..724d64e9 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -1,1112 +1,1068 @@ /* blowfish.c - Blowfish encryption * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * * For a description of the algorithm, see: * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. * ISBN 0-471-11709-9. Pages 336 ff. */ /* Test values: * key "abcdefghijklmnopqrstuvwxyz"; * plain "BLOWFISH" * cipher 32 4E D0 FE F4 13 A2 03 * */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #define BLOWFISH_BLOCKSIZE 8 #define BLOWFISH_ROUNDS 16 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ (BLOWFISH_ROUNDS == 16) # define USE_AMD64_ASM 1 #endif /* USE_ARM_ASM indicates whether to use ARM assembly code. */ #undef USE_ARM_ASM #if defined(__ARMEL__) # if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) # define USE_ARM_ASM 1 # endif #endif typedef struct { u32 s0[256]; u32 s1[256]; u32 s2[256]; u32 s3[256]; u32 p[BLOWFISH_ROUNDS+2]; } BLOWFISH_context; static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen); static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf); static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf); /* precomputed S boxes */ static const u32 ks0[256] = { 0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96, 0xBA7C9045,0xF12C7F99,0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16, 0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E,0x0D95748F,0x728EB658, 0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013, 0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E, 0x6C9E0E8B,0xB01E8A3E,0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60, 0xE65525F3,0xAA55AB94,0x57489862,0x63E81440,0x55CA396A,0x2AAB10B6, 0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A, 0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C, 0x7A325381,0x28958677,0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193, 0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032,0xEF845D5D,0xE98575B1, 0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239, 0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A, 0x670C9C61,0xABD388F0,0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3, 0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98,0xA1F1651D,0x39AF0176, 0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE, 0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706, 0x1BFEDF72,0x429B023D,0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B, 0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7,0xE3FE501A,0xB6794C3B, 0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463, 0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C, 0xCC814544,0xAF5EBD09,0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3, 0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB,0x5579C0BD,0x1A60320A, 0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8, 0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760, 0x53317B48,0x3E00DF82,0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB, 0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573,0x695B27B0,0xBBCA58C8, 0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B, 0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33, 0x62FB1341,0xCEE4C6E8,0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4, 0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0,0xD08ED1D0,0xAFC725E0, 0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C, 0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777, 0xEA752DFE,0x8B021FA1,0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299, 0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9,0x165FA266,0x80957705, 0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF, 0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E, 0x226800BB,0x57B8E0AF,0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA, 0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5,0x83260376,0x6295CFA9, 0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915, 0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F, 0xF296EC6B,0x2A0DD915,0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664, 0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A }; static const u32 ks1[256] = { 0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D, 0x9CEE60B8,0x8FEDB266,0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1, 0x193602A5,0x75094C29,0xA0591340,0xE4183A3E,0x3F54989A,0x5B429D65, 0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1, 0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9, 0x3C971814,0x6B6A70A1,0x687F3584,0x52A0E286,0xB79C5305,0xAA500737, 0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8,0xB03ADA37,0xF0500C0D, 0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD, 0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC, 0xC8B57634,0x9AF3DDA7,0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41, 0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331,0x4E548B38,0x4F6DB908, 0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF, 0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124, 0x501ADDE6,0x9F84CD87,0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C, 0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2,0xEF1C1847,0x3215D908, 0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD, 0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B, 0x3C11183B,0x5924A509,0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E, 0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3,0x771FE71C,0x4E3D06FA, 0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A, 0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D, 0x1939260F,0x19C27960,0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66, 0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28,0xC332DDEF,0xBE6C5AA5, 0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84, 0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96, 0x0334FE1E,0xAA0363CF,0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14, 0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E,0x648B1EAF,0x19BDF0CA, 0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7, 0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77, 0x11ED935F,0x16681281,0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99, 0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696,0xCDB30AEB,0x532E3054, 0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73, 0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA, 0xDB6C4F15,0xFACB4FD0,0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105, 0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250,0xCF62A1F2,0x5B8D2646, 0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285, 0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA, 0x1DADF43E,0x233F7061,0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB, 0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E,0xA6078084,0x19F8509E, 0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC, 0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD, 0x675FDA79,0xE3674340,0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20, 0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 }; static const u32 ks2[256] = { 0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7, 0xBCF46B2E,0xD4A20068,0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF, 0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840,0x4D95FC1D,0x96B591AF, 0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504, 0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4, 0x0A2C86DA,0xE9B66DFB,0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE, 0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6,0xAACE1E7C,0xD3375FEC, 0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B, 0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332, 0x6841E7F7,0xCA7820FB,0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527, 0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B,0x55A867BC,0xA1159A58, 0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C, 0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22, 0x48C1133F,0xC70F86DC,0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17, 0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564,0x257B7834,0x602A9C60, 0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115, 0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99, 0xDE720C8C,0x2DA2F728,0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0, 0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E,0x0A476341,0x992EFF74, 0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D, 0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3, 0xB5390F92,0x690FED0B,0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3, 0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB,0x37392EB3,0xCC115979, 0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C, 0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA, 0x3D25BDD8,0xE2E1C3C9,0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A, 0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE,0x9DBC8057,0xF0F7C086, 0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC, 0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24, 0x55464299,0xBF582E61,0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2, 0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9,0x7AEB2661,0x8B1DDF84, 0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C, 0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09, 0x662D09A1,0xC4324633,0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10, 0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169,0xDCB7DA83,0x573906FE, 0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027, 0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0, 0x006058AA,0x30DC7D62,0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634, 0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76,0x6F05E409,0x4B7C0188, 0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC, 0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8, 0xA28514D9,0x6C51133C,0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837, 0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 }; static const u32 ks3[256] = { 0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742, 0xD3822740,0x99BC9BBE,0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B, 0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4,0x5748AB2F,0xBC946E79, 0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6, 0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A, 0x63EF8CE2,0x9A86EE22,0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4, 0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6,0x2826A2F9,0xA73A3AE1, 0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59, 0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797, 0x2CF0B7D9,0x022B8B51,0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28, 0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C,0xE029AC71,0xE019A5E6, 0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28, 0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA, 0x03A16125,0x0564F0BD,0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A, 0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319,0x7533D928,0xB155FDF5, 0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F, 0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE, 0x5121CE64,0x774FBE32,0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680, 0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166,0xB39A460A,0x6445C0DD, 0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB, 0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB, 0x8D6612AE,0xBF3C6F47,0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370, 0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D,0x4040CB08,0x4EB4E2CC, 0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048, 0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC, 0xBB3A792B,0x344525BD,0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9, 0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7,0x1A908749,0xD44FBD9A, 0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F, 0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A, 0x0F91FC71,0x9B941525,0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1, 0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442,0xE0EC6E0E,0x1698DB3B, 0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E, 0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E, 0xE60B6F47,0x0FE3F11D,0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F, 0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299,0xF523F357,0xA6327623, 0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC, 0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A, 0x45E1D006,0xC3F27B9A,0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6, 0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B,0x53113EC0,0x1640E3D3, 0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060, 0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C, 0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F, 0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 }; static const u32 ps[BLOWFISH_ROUNDS+2] = { 0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0, 0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C, 0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B }; #ifdef USE_AMD64_ASM /* Assembly implementations of Blowfish. */ extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, u32 *ret_xr); extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out, const byte *in); extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out, const byte *in); /* These assembly implementations process four blocks in parallel. */ extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, byte *ctr); extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -static inline void -call_sysv_fn (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : - : "cc", "memory", "r8", "r9", "r10", "r11"); -} -#endif - static void do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL); -#else _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr); -#endif } static void do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf, - NULL); -#else _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf); -#endif } static void do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf, - NULL); -#else _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf); -#endif } static inline void blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, byte *ctr) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr); -#else _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr); -#endif } static inline void blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv); -#else _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv); -#endif } static inline void blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv); -#else _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv); -#endif } static unsigned int encrypt_block (void *context , byte *outbuf, const byte *inbuf) { BLOWFISH_context *c = (BLOWFISH_context *) context; do_encrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (2*8); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { BLOWFISH_context *c = (BLOWFISH_context *) context; do_decrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (2*8); } #elif defined(USE_ARM_ASM) /* Assembly implementations of Blowfish. */ extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, u32 *ret_xr); extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out, const byte *in); extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out, const byte *in); /* These assembly implementations process two blocks in parallel. */ extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, byte *ctr); extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); static void do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr); } static void do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf); } static void do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf); } static unsigned int encrypt_block (void *context , byte *outbuf, const byte *inbuf) { BLOWFISH_context *c = (BLOWFISH_context *) context; do_encrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (10*4); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { BLOWFISH_context *c = (BLOWFISH_context *) context; do_decrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (10*4); } #else /*USE_ARM_ASM*/ #if BLOWFISH_ROUNDS != 16 static inline u32 function_F( BLOWFISH_context *bc, u32 x ) { u16 a, b, c, d; #ifdef WORDS_BIGENDIAN a = ((byte*)&x)[0]; b = ((byte*)&x)[1]; c = ((byte*)&x)[2]; d = ((byte*)&x)[3]; #else a = ((byte*)&x)[3]; b = ((byte*)&x)[2]; c = ((byte*)&x)[1]; d = ((byte*)&x)[0]; #endif return ((bc->s0[a] + bc->s1[b]) ^ bc->s2[c] ) + bc->s3[d]; } #endif #ifdef WORDS_BIGENDIAN #define F(x) ((( s0[((byte*)&x)[0]] + s1[((byte*)&x)[1]]) \ ^ s2[((byte*)&x)[2]]) + s3[((byte*)&x)[3]] ) #else #define F(x) ((( s0[((byte*)&x)[3]] + s1[((byte*)&x)[2]]) \ ^ s2[((byte*)&x)[1]]) + s3[((byte*)&x)[0]] ) #endif #define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0) static void do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { #if BLOWFISH_ROUNDS == 16 u32 xl, xr, *s0, *s1, *s2, *s3, *p; xl = *ret_xl; xr = *ret_xr; p = bc->p; s0 = bc->s0; s1 = bc->s1; s2 = bc->s2; s3 = bc->s3; R( xl, xr, 0); R( xr, xl, 1); R( xl, xr, 2); R( xr, xl, 3); R( xl, xr, 4); R( xr, xl, 5); R( xl, xr, 6); R( xr, xl, 7); R( xl, xr, 8); R( xr, xl, 9); R( xl, xr, 10); R( xr, xl, 11); R( xl, xr, 12); R( xr, xl, 13); R( xl, xr, 14); R( xr, xl, 15); xl ^= p[BLOWFISH_ROUNDS]; xr ^= p[BLOWFISH_ROUNDS+1]; *ret_xl = xr; *ret_xr = xl; #else u32 xl, xr, temp, *p; int i; xl = *ret_xl; xr = *ret_xr; p = bc->p; for(i=0; i < BLOWFISH_ROUNDS; i++ ) { xl ^= p[i]; xr ^= function_F(bc, xl); temp = xl; xl = xr; xr = temp; } temp = xl; xl = xr; xr = temp; xr ^= p[BLOWFISH_ROUNDS]; xl ^= p[BLOWFISH_ROUNDS+1]; *ret_xl = xl; *ret_xr = xr; #endif } static void decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { #if BLOWFISH_ROUNDS == 16 u32 xl, xr, *s0, *s1, *s2, *s3, *p; xl = *ret_xl; xr = *ret_xr; p = bc->p; s0 = bc->s0; s1 = bc->s1; s2 = bc->s2; s3 = bc->s3; R( xl, xr, 17); R( xr, xl, 16); R( xl, xr, 15); R( xr, xl, 14); R( xl, xr, 13); R( xr, xl, 12); R( xl, xr, 11); R( xr, xl, 10); R( xl, xr, 9); R( xr, xl, 8); R( xl, xr, 7); R( xr, xl, 6); R( xl, xr, 5); R( xr, xl, 4); R( xl, xr, 3); R( xr, xl, 2); xl ^= p[1]; xr ^= p[0]; *ret_xl = xr; *ret_xr = xl; #else u32 xl, xr, temp, *p; int i; xl = *ret_xl; xr = *ret_xr; p = bc->p; for (i=BLOWFISH_ROUNDS+1; i > 1; i-- ) { xl ^= p[i]; xr ^= function_F(bc, xl); temp = xl; xl = xr; xr = temp; } temp = xl; xl = xr; xr = temp; xr ^= p[1]; xl ^= p[0]; *ret_xl = xl; *ret_xr = xr; #endif } #undef F #undef R static void do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf ) { u32 d1, d2; d1 = buf_get_be32(inbuf); d2 = buf_get_be32(inbuf + 4); do_encrypt( bc, &d1, &d2 ); buf_put_be32(outbuf, d1); buf_put_be32(outbuf + 4, d2); } static unsigned int encrypt_block (void *context, byte *outbuf, const byte *inbuf) { BLOWFISH_context *bc = (BLOWFISH_context *) context; do_encrypt_block (bc, outbuf, inbuf); return /*burn_stack*/ (64); } static void do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, const byte *inbuf) { u32 d1, d2; d1 = buf_get_be32(inbuf); d2 = buf_get_be32(inbuf + 4); decrypt( bc, &d1, &d2 ); buf_put_be32(outbuf, d1); buf_put_be32(outbuf + 4, d2); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { BLOWFISH_context *bc = (BLOWFISH_context *) context; do_decrypt_block (bc, outbuf, inbuf); return /*burn_stack*/ (64); } #endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOWFISH_BLOCKSIZE. */ void _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { BLOWFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[BLOWFISH_BLOCKSIZE]; int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; int i; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 5 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; inbuf += 4 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; inbuf += 2 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ do_encrypt_block(ctx, tmpbuf, ctr); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); outbuf += BLOWFISH_BLOCKSIZE; inbuf += BLOWFISH_BLOCKSIZE; /* Increment the counter. */ for (i = BLOWFISH_BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { BLOWFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[BLOWFISH_BLOCKSIZE]; int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 5 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; inbuf += 4 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; inbuf += 2 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ do_decrypt_block (ctx, savebuf, inbuf); buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE); inbuf += BLOWFISH_BLOCKSIZE; outbuf += BLOWFISH_BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { BLOWFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = (64) + 2 * BLOWFISH_BLOCKSIZE; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 5 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; inbuf += 4 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; inbuf += 2 * BLOWFISH_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { do_encrypt_block(ctx, iv, iv); buf_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE); outbuf += BLOWFISH_BLOCKSIZE; inbuf += BLOWFISH_BLOCKSIZE; } _gcry_burn_stack(burn_stack_depth); } /* Run the self-tests for BLOWFISH-CTR, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char * selftest_ctr (void) { const int nblocks = 4+1; const int blocksize = BLOWFISH_BLOCKSIZE; const int context_size = sizeof(BLOWFISH_context); return _gcry_selftest_helper_ctr("BLOWFISH", &bf_setkey, &encrypt_block, &_gcry_blowfish_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for BLOWFISH-CBC, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cbc (void) { const int nblocks = 4+2; const int blocksize = BLOWFISH_BLOCKSIZE; const int context_size = sizeof(BLOWFISH_context); return _gcry_selftest_helper_cbc("BLOWFISH", &bf_setkey, &encrypt_block, &_gcry_blowfish_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for BLOWFISH-CFB, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cfb (void) { const int nblocks = 4+2; const int blocksize = BLOWFISH_BLOCKSIZE; const int context_size = sizeof(BLOWFISH_context); return _gcry_selftest_helper_cfb("BLOWFISH", &bf_setkey, &encrypt_block, &_gcry_blowfish_cfb_dec, nblocks, blocksize, context_size); } static const char* selftest(void) { BLOWFISH_context c; byte plain[] = "BLOWFISH"; byte buffer[8]; static const byte plain3[] = { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 }; static const byte key3[] = { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 }; static const byte cipher3[] = { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 }; const char *r; bf_setkey( (void *) &c, (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26 ); encrypt_block( (void *) &c, buffer, plain ); if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) ) return "Blowfish selftest failed (1)."; decrypt_block( (void *) &c, buffer, buffer ); if( memcmp( buffer, plain, 8 ) ) return "Blowfish selftest failed (2)."; bf_setkey( (void *) &c, key3, 8 ); encrypt_block( (void *) &c, buffer, plain3 ); if( memcmp( buffer, cipher3, 8 ) ) return "Blowfish selftest failed (3)."; decrypt_block( (void *) &c, buffer, buffer ); if( memcmp( buffer, plain3, 8 ) ) return "Blowfish selftest failed (4)."; if ( (r = selftest_cbc ()) ) return r; if ( (r = selftest_cfb ()) ) return r; if ( (r = selftest_ctr ()) ) return r; return NULL; } struct hashset_elem { u32 val; short nidx; char used; }; static inline byte val_to_hidx(u32 val) { /* bf sboxes are quite random already. */ return (val >> 24) ^ (val >> 16) ^ (val >> 8) ^ val; } static inline int add_val(struct hashset_elem hset[256], u32 val, int *midx, struct hashset_elem *mpool) { struct hashset_elem *elem; byte hidx; hidx = val_to_hidx(val); elem = &hset[hidx]; /* Check if first is in use. */ if (elem->used == 0) { elem->val = val; elem->nidx = -1; elem->used = 1; return 0; } /* Check if first matches. */ if (elem->val == val) return 1; for (; elem->nidx >= 0; elem = &mpool[elem->nidx]) { /* Check if elem matches. */ if (elem->val == val) return 1; } elem->nidx = (*midx)++; elem = &mpool[elem->nidx]; elem->val = val; elem->nidx = -1; elem->used = 1; return 0; } static gcry_err_code_t do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen) { struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. */ struct hashset_elem hset[4][256]; int memidx = 0; int weak = 0; int i, j, ret; u32 data, datal, datar; static int initialized; static const char *selftest_failed; if( !initialized ) { initialized = 1; selftest_failed = selftest(); if( selftest_failed ) log_error ("%s\n", selftest_failed ); } if( selftest_failed ) return GPG_ERR_SELFTEST_FAILED; memset(hset, 0, sizeof(hset)); for(i=0; i < BLOWFISH_ROUNDS+2; i++ ) c->p[i] = ps[i]; for(i=0; i < 256; i++ ) { c->s0[i] = ks0[i]; c->s1[i] = ks1[i]; c->s2[i] = ks2[i]; c->s3[i] = ks3[i]; } for(i=j=0; i < BLOWFISH_ROUNDS+2; i++ ) { data = ((u32)key[j] << 24) | ((u32)key[(j+1)%keylen] << 16) | ((u32)key[(j+2)%keylen] << 8) | ((u32)key[(j+3)%keylen]); c->p[i] ^= data; j = (j+4) % keylen; } datal = datar = 0; for(i=0; i < BLOWFISH_ROUNDS+2; i += 2 ) { do_encrypt( c, &datal, &datar ); c->p[i] = datal; c->p[i+1] = datar; } for(i=0; i < 256; i += 2 ) { do_encrypt( c, &datal, &datar ); c->s0[i] = datal; c->s0[i+1] = datar; /* Add values to hashset, detect duplicates (weak keys). */ ret = add_val (hset[0], datal, &memidx, mempool); weak = ret ? 1 : weak; ret = add_val (hset[0], datar, &memidx, mempool); weak = ret ? 1 : weak; } for(i=0; i < 256; i += 2 ) { do_encrypt( c, &datal, &datar ); c->s1[i] = datal; c->s1[i+1] = datar; /* Add values to hashset, detect duplicates (weak keys). */ ret = add_val (hset[1], datal, &memidx, mempool); weak = ret ? 1 : weak; ret = add_val (hset[1], datar, &memidx, mempool); weak = ret ? 1 : weak; } for(i=0; i < 256; i += 2 ) { do_encrypt( c, &datal, &datar ); c->s2[i] = datal; c->s2[i+1] = datar; /* Add values to hashset, detect duplicates (weak keys). */ ret = add_val (hset[2], datal, &memidx, mempool); weak = ret ? 1 : weak; ret = add_val (hset[2], datar, &memidx, mempool); weak = ret ? 1 : weak; } for(i=0; i < 256; i += 2 ) { do_encrypt( c, &datal, &datar ); c->s3[i] = datal; c->s3[i+1] = datar; /* Add values to hashset, detect duplicates (weak keys). */ ret = add_val (hset[3], datal, &memidx, mempool); weak = ret ? 1 : weak; ret = add_val (hset[3], datar, &memidx, mempool); weak = ret ? 1 : weak; } /* Clear stack. */ wipememory(hset, sizeof(hset)); wipememory(mempool, sizeof(mempool[0]) * memidx); _gcry_burn_stack (64); /* Check for weak key. A weak key is a key in which a value in the P-array (here c) occurs more than once per table. */ if (weak) return GPG_ERR_WEAK_KEY; return GPG_ERR_NO_ERROR; } static gcry_err_code_t bf_setkey (void *context, const byte *key, unsigned keylen) { BLOWFISH_context *c = (BLOWFISH_context *) context; gcry_err_code_t rc = do_bf_setkey (c, key, keylen); return rc; } gcry_cipher_spec_t _gcry_cipher_spec_blowfish = { GCRY_CIPHER_BLOWFISH, {0, 0}, "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128, sizeof (BLOWFISH_context), bf_setkey, encrypt_block, decrypt_block }; diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S index c04015a2..1a1d43fd 100644 --- a/cipher/cast5-amd64.S +++ b/cipher/cast5-amd64.S @@ -1,605 +1,599 @@ /* cast5-amd64.S - AMD64 assembly implementation of CAST5 cipher * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifdef __x86_64 #include #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5) -#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__) -# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg -#else -# ifdef __code_model_large__ -# define GET_EXTERN_POINTER(name, reg) \ - pushq %r15; \ - pushq %r14; \ - 1: leaq 1b(%rip), reg; \ - movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \ - movabsq $name@GOT, %r15; \ - addq %r14, reg; \ - popq %r14; \ - movq (reg, %r15), reg; \ - popq %r15; -# else -# define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg -# endif -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text .extern _gcry_cast5_s1to4; #define s1 0 #define s2 (s1 + (4 * 256)) #define s3 (s2 + (4 * 256)) #define s4 (s3 + (4 * 256)) /* structure of CAST5_context: */ #define Km 0 #define Kr (Km + (16 * 4)) /* register macros */ #define CTX %rdi #define RIO %rsi #define RTAB %r8 #define RLR0 %r9 #define RLR1 %r10 #define RLR2 %r11 #define RLR3 %r12 #define RLR0d %r9d #define RLR1d %r10d #define RLR2d %r11d #define RLR3d %r12d #define RX0 %rax #define RX1 %rbx #define RX2 %rdx #define RX0d %eax #define RX1d %ebx #define RX2d %edx #define RX0bl %al #define RX1bl %bl #define RX2bl %dl #define RX0bh %ah #define RX1bh %bh #define RX2bh %dh #define RKR %rcx #define RKRd %ecx #define RKRbl %cl #define RT0 %rbp #define RT1 %rsi #define RT0d %ebp #define RT1d %esi #define RKM0d %r13d #define RKM1d %r14d /*********************************************************************** * 1-way cast5 ***********************************************************************/ #define dummy(x) #define shr_kr(none) \ shrq $8, RKR; #define F(km, load_next_kr, op0, op1, op2, op3) \ op0 ## l RLR0d, km ## d; \ roll RKRbl, km ## d; \ rorq $32, RLR0; \ movzbl km ## bh, RT0d; \ movzbl km ## bl, RT1d; \ roll $16, km ## d; \ movl s1(RTAB,RT0,4), RT0d; \ op1 ## l s2(RTAB,RT1,4), RT0d; \ load_next_kr(kr_next); \ movzbl km ## bh, RT1d; \ movzbl km ## bl, km ## d; \ op2 ## l s3(RTAB,RT1,4), RT0d; \ op3 ## l s4(RTAB,km,4), RT0d; \ xorq RT0, RLR0; #define F1(km, load_next_kr) \ F(##km, load_next_kr, add, xor, sub, add) #define F2(km, load_next_kr) \ F(##km, load_next_kr, xor, sub, add, xor) #define F3(km, load_next_kr) \ F(##km, load_next_kr, sub, add, xor, sub) #define get_round_km(n, km) \ movl Km+4*(n)(CTX), km; #define get_round_kr_enc(n) \ movq $0x1010101010101010, RKR; \ \ /* merge rorl rk and rorl $16 */ \ xorq Kr+(n)(CTX), RKR; #define get_round_kr_dec(n) \ movq $0x1010101010101010, RKR; \ \ /* merge rorl rk and rorl $16 */ \ xorq Kr+(n - 7)(CTX), RKR; \ bswapq RKR; #define round_enc(n, FA, FB, fn1, fn2) \ get_round_km(n + 1, RX2d); \ FA(RX0, fn1); \ get_round_km(n + 2, RX0d); \ FB(RX2, fn2); #define round_enc_last(n, FXA, FXB) \ get_round_km(n + 1, RX2d); \ \ FXA(RX0, shr_kr); \ FXB(RX2, dummy); #define round_enc_1(n, FA, FB) \ round_enc(n, FA, FB, shr_kr, shr_kr) #define round_enc_2(n, FA, FB) \ round_enc(n, FA, FB, shr_kr, dummy) #define round_dec(n, FA, FB, fn1, fn2) \ get_round_km(n - 1, RX2d); \ FA(RX0, fn1); \ get_round_km(n - 2, RX0d); \ FB(RX2, fn2); #define round_dec_last(n, FXA, FXB) \ get_round_km(n - 1, RX2d); \ FXA(RX0, shr_kr); \ FXB(RX2, dummy); #define round_dec_1(n, FA, FB) \ round_dec(n, FA, FB, shr_kr, shr_kr) #define round_dec_2(n, FA, FB) \ round_dec(n, FA, FB, shr_kr, dummy) #define read_block() \ movq (RIO), RLR0; \ bswapq RLR0; #define write_block() \ bswapq RLR0; \ rorq $32, RLR0; \ movq RLR0, (RIO); .align 8 .globl _gcry_cast5_amd64_encrypt_block ELF(.type _gcry_cast5_amd64_encrypt_block,@function;) _gcry_cast5_amd64_encrypt_block: /* input: * %rdi: ctx, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; movq %rsi, %r10; GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); movq %rdx, RIO; read_block(); get_round_km(0, RX0d); get_round_kr_enc(0); round_enc_1(0, F1, F2); round_enc_1(2, F3, F1); round_enc_1(4, F2, F3); round_enc_2(6, F1, F2); get_round_kr_enc(8); round_enc_1(8, F3, F1); round_enc_1(10, F2, F3); round_enc_1(12, F1, F2); round_enc_last(14, F3, F1); movq %r10, RIO; write_block(); popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;) .align 8 .globl _gcry_cast5_amd64_decrypt_block ELF(.type _gcry_cast5_amd64_decrypt_block,@function;) _gcry_cast5_amd64_decrypt_block: /* input: * %rdi: ctx, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; movq %rsi, %r10; GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); movq %rdx, RIO; read_block(); get_round_km(15, RX0d); get_round_kr_dec(15); round_dec_1(15, F1, F3); round_dec_1(13, F2, F1); round_dec_1(11, F3, F2); round_dec_2(9, F1, F3); get_round_kr_dec(7); round_dec_1(7, F2, F1); round_dec_1(5, F3, F2); round_dec_1(3, F1, F3); round_dec_last(1, F2, F1); movq %r10, RIO; write_block(); popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;) /********************************************************************** 4-way cast5, four blocks parallel **********************************************************************/ #define F_tail(rlr, rx, op1, op2, op3) \ movzbl rx ## bh, RT0d; \ movzbl rx ## bl, RT1d; \ roll $16, rx ## d; \ movl s1(RTAB,RT0,4), RT0d; \ op1 ## l s2(RTAB,RT1,4), RT0d; \ movzbl rx ## bh, RT1d; \ movzbl rx ## bl, rx ## d; \ op2 ## l s3(RTAB,RT1,4), RT0d; \ op3 ## l s4(RTAB,rx,4), RT0d; \ xorq RT0, rlr; #define F4(km, load_next_kr, op0, op1, op2, op3) \ movl km, RX0d; \ op0 ## l RLR0d, RX0d; \ roll RKRbl, RX0d; \ rorq $32, RLR0; \ \ movl km, RX1d; \ op0 ## l RLR1d, RX1d; \ roll RKRbl, RX1d; \ rorq $32, RLR1; \ \ movl km, RX2d; \ op0 ## l RLR2d, RX2d; \ roll RKRbl, RX2d; \ rorq $32, RLR2; \ \ F_tail(RLR0, RX0, op1, op2, op3); \ F_tail(RLR1, RX1, op1, op2, op3); \ F_tail(RLR2, RX2, op1, op2, op3); \ \ movl km, RX0d; \ op0 ## l RLR3d, RX0d; \ roll RKRbl, RX0d; \ load_next_kr(); \ rorq $32, RLR3; \ \ F_tail(RLR3, RX0, op1, op2, op3); #define F4_1(km, load_next_kr) \ F4(km, load_next_kr, add, xor, sub, add) #define F4_2(km, load_next_kr) \ F4(km, load_next_kr, xor, sub, add, xor) #define F4_3(km, load_next_kr) \ F4(km, load_next_kr, sub, add, xor, sub) #define round_enc4(n, FA, FB, fn1, fn2) \ get_round_km(n + 1, RKM1d); \ FA(RKM0d, fn1); \ get_round_km(n + 2, RKM0d); \ FB(RKM1d, fn2); #define round_enc_last4(n, FXA, FXB) \ get_round_km(n + 1, RKM1d); \ FXA(RKM0d, shr_kr); \ FXB(RKM1d, dummy); #define round_enc4_1(n, FA, FB) \ round_enc4(n, FA, FB, shr_kr, shr_kr); #define round_enc4_2(n, FA, FB) \ round_enc4(n, FA, FB, shr_kr, dummy); #define round_dec4(n, FA, FB, fn1, fn2) \ get_round_km(n - 1, RKM1d); \ FA(RKM0d, fn1); \ get_round_km(n - 2, RKM0d); \ FB(RKM1d, fn2); #define round_dec_last4(n, FXA, FXB) \ get_round_km(n - 1, RKM1d); \ FXA(RKM0d, shr_kr); \ FXB(RKM1d, dummy); #define round_dec4_1(n, FA, FB) \ round_dec4(n, FA, FB, shr_kr, shr_kr); #define round_dec4_2(n, FA, FB) \ round_dec4(n, FA, FB, shr_kr, dummy); #define inbswap_block4(a, b, c, d) \ bswapq a; \ bswapq b; \ bswapq c; \ bswapq d; #define outbswap_block4(a, b, c, d) \ bswapq a; \ bswapq b; \ bswapq c; \ bswapq d; \ rorq $32, a; \ rorq $32, b; \ rorq $32, c; \ rorq $32, d; .align 8 ELF(.type __cast5_enc_blk4,@function;) __cast5_enc_blk4: /* input: * %rdi: ctx, CTX * RLR0,RLR1,RLR2,RLR3: four input plaintext blocks * output: * RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks */ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); get_round_km(0, RKM0d); get_round_kr_enc(0); round_enc4_1(0, F4_1, F4_2); round_enc4_1(2, F4_3, F4_1); round_enc4_1(4, F4_2, F4_3); round_enc4_2(6, F4_1, F4_2); get_round_kr_enc(8); round_enc4_1(8, F4_3, F4_1); round_enc4_1(10, F4_2, F4_3); round_enc4_1(12, F4_1, F4_2); round_enc_last4(14, F4_3, F4_1); outbswap_block4(RLR0, RLR1, RLR2, RLR3); ret; ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;) .align 8 ELF(.type __cast5_dec_blk4,@function;) __cast5_dec_blk4: /* input: * %rdi: ctx, CTX * RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks * output: * RLR0,RLR1,RLR2,RLR3: four output plaintext blocks */ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); inbswap_block4(RLR0, RLR1, RLR2, RLR3); get_round_km(15, RKM0d); get_round_kr_dec(15); round_dec4_1(15, F4_1, F4_3); round_dec4_1(13, F4_2, F4_1); round_dec4_1(11, F4_3, F4_2); round_dec4_2(9, F4_1, F4_3); get_round_kr_dec(7); round_dec4_1(7, F4_2, F4_1); round_dec4_1(5, F4_3, F4_2); round_dec4_1(3, F4_1, F4_3); round_dec_last4(1, F4_2, F4_1); outbswap_block4(RLR0, RLR1, RLR2, RLR3); ret; ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;) .align 8 .globl _gcry_cast5_amd64_ctr_enc ELF(.type _gcry_cast5_amd64_ctr_enc,@function;) _gcry_cast5_amd64_ctr_enc: /* input: * %rdi: ctx, CTX * %rsi: dst (8 blocks) * %rdx: src (8 blocks) * %rcx: iv (big endian, 64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %rsi; pushq %rdx; /* load IV and byteswap */ movq (%rcx), RX0; bswapq RX0; movq RX0, RLR0; /* construct IVs */ leaq 1(RX0), RLR1; leaq 2(RX0), RLR2; leaq 3(RX0), RLR3; leaq 4(RX0), RX0; bswapq RX0; /* store new IV */ movq RX0, (%rcx); call __cast5_enc_blk4; popq %r14; /*src*/ popq %r13; /*dst*/ /* XOR key-stream with plaintext */ xorq 0 * 8(%r14), RLR0; xorq 1 * 8(%r14), RLR1; xorq 2 * 8(%r14), RLR2; xorq 3 * 8(%r14), RLR3; movq RLR0, 0 * 8(%r13); movq RLR1, 1 * 8(%r13); movq RLR2, 2 * 8(%r13); movq RLR3, 3 * 8(%r13); popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;) .align 8 .globl _gcry_cast5_amd64_cbc_dec ELF(.type _gcry_cast5_amd64_cbc_dec,@function;) _gcry_cast5_amd64_cbc_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (8 blocks) * %rdx: src (8 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %rcx; pushq %rsi; pushq %rdx; /* load input */ movq 0 * 8(%rdx), RLR0; movq 1 * 8(%rdx), RLR1; movq 2 * 8(%rdx), RLR2; movq 3 * 8(%rdx), RLR3; call __cast5_dec_blk4; popq RX0; /*src*/ popq RX1; /*dst*/ popq RX2; /*iv*/ movq 3 * 8(RX0), %r14; xorq (RX2), RLR0; xorq 0 * 8(RX0), RLR1; xorq 1 * 8(RX0), RLR2; xorq 2 * 8(RX0), RLR3; movq %r14, (RX2); /* store new IV */ movq RLR0, 0 * 8(RX1); movq RLR1, 1 * 8(RX1); movq RLR2, 2 * 8(RX1); movq RLR3, 3 * 8(RX1); popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;) .align 8 .globl _gcry_cast5_amd64_cfb_dec ELF(.type _gcry_cast5_amd64_cfb_dec,@function;) _gcry_cast5_amd64_cfb_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (8 blocks) * %rdx: src (8 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %rsi; pushq %rdx; /* Load input */ movq (%rcx), RLR0; movq 0 * 8(%rdx), RLR1; movq 1 * 8(%rdx), RLR2; movq 2 * 8(%rdx), RLR3; inbswap_block4(RLR0, RLR1, RLR2, RLR3); /* Update IV */ movq 3 * 8(%rdx), %rdx; movq %rdx, (%rcx); call __cast5_enc_blk4; popq %rdx; /*src*/ popq %rcx; /*dst*/ xorq 0 * 8(%rdx), RLR0; xorq 1 * 8(%rdx), RLR1; xorq 2 * 8(%rdx), RLR2; xorq 3 * 8(%rdx), RLR3; movq RLR0, 0 * 8(%rcx); movq RLR1, 1 * 8(%rcx); movq RLR2, 2 * 8(%rcx); movq RLR3, 3 * 8(%rcx); popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;) #endif /*defined(USE_CAST5)*/ #endif /*__x86_64*/ diff --git a/cipher/cast5.c b/cipher/cast5.c index 94dcee76..d23882b9 100644 --- a/cipher/cast5.c +++ b/cipher/cast5.c @@ -1,1045 +1,1007 @@ /* cast5.c - CAST5 cipher (RFC2144) * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /* Test vectors: * * 128-bit key = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A * plaintext = 01 23 45 67 89 AB CD EF * ciphertext = 23 8B 4F E5 84 7E 44 B2 * * 80-bit key = 01 23 45 67 12 34 56 78 23 45 * = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00 * plaintext = 01 23 45 67 89 AB CD EF * ciphertext = EB 6A 71 1A 2C 02 27 1B * * 40-bit key = 01 23 45 67 12 * = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00 * plaintext = 01 23 45 67 89 AB CD EF * ciphertext = 7A C8 16 D1 6E 9B 30 2E */ #include #include #include #include #include "g10lib.h" #include "types.h" #include "cipher.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher-selftest.h" /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif /* USE_ARM_ASM indicates whether to use ARM assembly code. */ #undef USE_ARM_ASM #if defined(__ARMEL__) # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS # define USE_ARM_ASM 1 # endif #endif #define CAST5_BLOCKSIZE 8 typedef struct { u32 Km[16]; byte Kr[16]; #ifdef USE_ARM_ASM u32 Kr_arm_enc[16 / sizeof(u32)]; u32 Kr_arm_dec[16 / sizeof(u32)]; #endif } CAST5_context; static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen); static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf); static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf); #define s1 _gcry_cast5_s1to4[0] #define s2 _gcry_cast5_s1to4[1] #define s3 _gcry_cast5_s1to4[2] #define s4 _gcry_cast5_s1to4[3] const u32 _gcry_cast5_s1to4[4][256] = { { 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949, 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d, 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0, 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, 0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935, 0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d, 0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50, 0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe, 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3, 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, 0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291, 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779, 0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2, 0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511, 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d, 0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, 0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c, 0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, 0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96, 0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, 0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd, 0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, 0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9, 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872, 0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c, 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e, 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9, 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf }, { 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651, 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb, 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806, 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, 0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359, 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b, 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, 0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34, 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb, 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, 0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860, 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b, 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, 0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b, 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf, 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, 0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13, 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f, 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, 0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6, 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58, 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, 0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d, 0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6, 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, 0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6, 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f, 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa, 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9, 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1 }, { 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90, 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5, 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e, 0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240, 0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5, 0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b, 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71, 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, 0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82, 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15, 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, 0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176, 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148, 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, 0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341, 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e, 0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, 0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f, 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a, 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, 0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b, 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5, 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, 0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536, 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc, 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, 0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69, 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2, 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d, 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a, 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783 }, { 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1, 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15, 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121, 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, 0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5, 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb, 0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, 0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d, 0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6, 0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, 0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003, 0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6, 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, 0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24, 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a, 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, 0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df, 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26, 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, 0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7, 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417, 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2, 0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2, 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a, 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, 0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef, 0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876, 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab, 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04, 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282, 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2 } }; static const u32 s5[256] = { 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f, 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a, 0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff, 0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02, 0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a, 0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7, 0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9, 0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981, 0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774, 0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655, 0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2, 0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910, 0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1, 0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da, 0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049, 0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f, 0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba, 0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be, 0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3, 0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840, 0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4, 0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2, 0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7, 0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5, 0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e, 0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e, 0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801, 0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad, 0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0, 0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20, 0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8, 0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4 }; static const u32 s6[256] = { 0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac, 0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138, 0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367, 0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98, 0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072, 0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3, 0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd, 0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8, 0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9, 0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54, 0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387, 0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc, 0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf, 0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf, 0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f, 0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289, 0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950, 0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f, 0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b, 0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be, 0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13, 0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976, 0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0, 0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891, 0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da, 0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc, 0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084, 0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25, 0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121, 0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5, 0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd, 0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f }; static const u32 s7[256] = { 0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f, 0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de, 0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43, 0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19, 0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2, 0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516, 0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88, 0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816, 0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756, 0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a, 0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264, 0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688, 0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28, 0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3, 0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7, 0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06, 0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033, 0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a, 0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566, 0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509, 0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962, 0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e, 0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c, 0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c, 0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285, 0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301, 0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be, 0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767, 0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647, 0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914, 0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c, 0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3 }; static const u32 s8[256] = { 0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5, 0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc, 0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd, 0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d, 0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2, 0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862, 0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc, 0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c, 0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e, 0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039, 0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8, 0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42, 0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5, 0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472, 0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225, 0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c, 0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb, 0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054, 0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70, 0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc, 0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c, 0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3, 0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4, 0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101, 0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f, 0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e, 0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a, 0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c, 0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384, 0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c, 0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82, 0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e }; #ifdef USE_AMD64_ASM /* Assembly implementations of CAST5. */ extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); /* These assembly implementations process four blocks in parallel. */ extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr); extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -static inline void -call_sysv_fn (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : - : "cc", "memory", "r8", "r9", "r10", "r11"); -} -#endif - static void do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_cast5_amd64_encrypt_block, context, outbuf, inbuf, NULL); -#else _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf); -#endif } static void do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_cast5_amd64_decrypt_block, context, outbuf, inbuf, NULL); -#else _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf); -#endif } static void cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_cast5_amd64_ctr_enc, ctx, out, in, ctr); -#else _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr); -#endif } static void cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_cast5_amd64_cbc_dec, ctx, out, in, iv); -#else _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv); -#endif } static void cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_cast5_amd64_cfb_dec, ctx, out, in, iv); -#else _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv); -#endif } static unsigned int encrypt_block (void *context , byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_encrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (2*8); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_decrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (2*8); } #elif defined(USE_ARM_ASM) /* ARM assembly implementations of CAST5. */ extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); /* These assembly implementations process two blocks in parallel. */ extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr); extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); static void do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf); } static void do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf); } static unsigned int encrypt_block (void *context , byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_encrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (10*4); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_decrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (10*4); } #else /*USE_ARM_ASM*/ #define F1(D,m,r) ( (I = ((m) + (D))), (I=rol(I,(r))), \ (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) ) #define F2(D,m,r) ( (I = ((m) ^ (D))), (I=rol(I,(r))), \ (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) ) #define F3(D,m,r) ( (I = ((m) - (D))), (I=rol(I,(r))), \ (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) ) static void do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf ) { u32 l, r, t; u32 I; /* used by the Fx macros */ u32 *Km; byte *Kr; Km = c->Km; Kr = c->Kr; /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) */ l = buf_get_be32(inbuf + 0); r = buf_get_be32(inbuf + 4); /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: * Li = Ri-1; * Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2 * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1. * Rounds 2, 5, 8, 11, and 14 use f function Type 2. * Rounds 3, 6, 9, 12, and 15 use f function Type 3. */ t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]); t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]); t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]); t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]); t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]); t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]); t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]); t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]); t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]); t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]); t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and * concatenate to form the ciphertext.) */ buf_put_be32(outbuf + 0, r); buf_put_be32(outbuf + 4, l); } static unsigned int encrypt_block (void *context , byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_encrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (20+4*sizeof(void*)); } static void do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf ) { u32 l, r, t; u32 I; u32 *Km; byte *Kr; Km = c->Km; Kr = c->Kr; l = buf_get_be32(inbuf + 0); r = buf_get_be32(inbuf + 4); t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]); t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]); t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]); t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]); t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]); t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]); t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]); t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]); t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]); t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]); buf_put_be32(outbuf + 0, r); buf_put_be32(outbuf + 4, l); } static unsigned int decrypt_block (void *context, byte *outbuf, const byte *inbuf) { CAST5_context *c = (CAST5_context *) context; do_decrypt_block (c, outbuf, inbuf); return /*burn_stack*/ (20+4*sizeof(void*)); } #endif /*!USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size CAST5_BLOCKSIZE. */ void _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAST5_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[CAST5_BLOCKSIZE]; int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; int i; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 8 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 4; outbuf += 4 * CAST5_BLOCKSIZE; inbuf += 4 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; inbuf += 2 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ do_encrypt_block(ctx, tmpbuf, ctr); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE); outbuf += CAST5_BLOCKSIZE; inbuf += CAST5_BLOCKSIZE; /* Increment the counter. */ for (i = CAST5_BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAST5_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[CAST5_BLOCKSIZE]; int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 8 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * CAST5_BLOCKSIZE; inbuf += 4 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; inbuf += 2 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ do_decrypt_block (ctx, savebuf, inbuf); buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE); inbuf += CAST5_BLOCKSIZE; outbuf += CAST5_BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAST5_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE; #ifdef USE_AMD64_ASM { if (nblocks >= 4) burn_stack_depth += 8 * sizeof(void*); /* Process data in 4 block chunks. */ while (nblocks >= 4) { cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * CAST5_BLOCKSIZE; inbuf += 4 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; inbuf += 2 * CAST5_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { do_encrypt_block(ctx, iv, iv); buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE); outbuf += CAST5_BLOCKSIZE; inbuf += CAST5_BLOCKSIZE; } _gcry_burn_stack(burn_stack_depth); } /* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char * selftest_ctr (void) { const int nblocks = 4+1; const int blocksize = CAST5_BLOCKSIZE; const int context_size = sizeof(CAST5_context); return _gcry_selftest_helper_ctr("CAST5", &cast_setkey, &encrypt_block, &_gcry_cast5_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for CAST5-CBC, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cbc (void) { const int nblocks = 4+2; const int blocksize = CAST5_BLOCKSIZE; const int context_size = sizeof(CAST5_context); return _gcry_selftest_helper_cbc("CAST5", &cast_setkey, &encrypt_block, &_gcry_cast5_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for CAST5-CFB, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cfb (void) { const int nblocks = 4+2; const int blocksize = CAST5_BLOCKSIZE; const int context_size = sizeof(CAST5_context); return _gcry_selftest_helper_cfb("CAST5", &cast_setkey, &encrypt_block, &_gcry_cast5_cfb_dec, nblocks, blocksize, context_size); } static const char* selftest(void) { CAST5_context c; static const byte key[16] = { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A }; static const byte plain[8] = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF }; static const byte cipher[8] = { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 }; byte buffer[8]; const char *r; cast_setkey( &c, key, 16 ); encrypt_block( &c, buffer, plain ); if( memcmp( buffer, cipher, 8 ) ) return "1"; decrypt_block( &c, buffer, buffer ); if( memcmp( buffer, plain, 8 ) ) return "2"; #if 0 /* full maintenance test */ { int i; byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6, 0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 }; byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71, 0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E }; for(i=0; i < 1000000; i++ ) { cast_setkey( &c, b0, 16 ); encrypt_block( &c, a0, a0 ); encrypt_block( &c, a0+8, a0+8 ); cast_setkey( &c, a0, 16 ); encrypt_block( &c, b0, b0 ); encrypt_block( &c, b0+8, b0+8 ); } if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) ) return "3"; } #endif if ( (r = selftest_cbc ()) ) return r; if ( (r = selftest_cfb ()) ) return r; if ( (r = selftest_ctr ()) ) return r; return NULL; } static void key_schedule( u32 *x, u32 *z, u32 *k ) { #define xi(i) ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff) #define zi(i) ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff) z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)]; k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)]; k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)]; k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)]; x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)]; k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)]; k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)]; k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)]; z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)]; k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)]; k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)]; k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)]; x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)]; k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)]; k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)]; k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)]; #undef xi #undef zi } static gcry_err_code_t do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen ) { static int initialized; static const char* selftest_failed; int i; u32 x[4]; u32 z[4]; u32 k[16]; if( !initialized ) { initialized = 1; selftest_failed = selftest(); if( selftest_failed ) log_error ("CAST5 selftest failed (%s).\n", selftest_failed ); } if( selftest_failed ) return GPG_ERR_SELFTEST_FAILED; if( keylen != 16 ) return GPG_ERR_INV_KEYLEN; x[0] = buf_get_be32(key + 0); x[1] = buf_get_be32(key + 4); x[2] = buf_get_be32(key + 8); x[3] = buf_get_be32(key + 12); key_schedule( x, z, k ); for(i=0; i < 16; i++ ) c->Km[i] = k[i]; key_schedule( x, z, k ); for(i=0; i < 16; i++ ) c->Kr[i] = k[i] & 0x1f; #ifdef USE_ARM_ASM for (i = 0; i < 4; i++) { byte Kr_arm[4]; /* Convert rotate left to rotate right and add shift left * by 2. */ Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f; Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f; Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f; Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f; /* Endian friendly store. */ c->Kr_arm_enc[i] = Kr_arm[0] | (Kr_arm[1] << 8) | (Kr_arm[2] << 16) | (Kr_arm[3] << 24); c->Kr_arm_dec[i] = Kr_arm[3] | (Kr_arm[2] << 8) | (Kr_arm[1] << 16) | (Kr_arm[0] << 24); wipememory(Kr_arm, sizeof(Kr_arm)); } #endif wipememory(x, sizeof x); wipememory(z, sizeof z); wipememory(k, sizeof k); #undef xi #undef zi return GPG_ERR_NO_ERROR; } static gcry_err_code_t cast_setkey (void *context, const byte *key, unsigned keylen ) { CAST5_context *c = (CAST5_context *) context; gcry_err_code_t rc = do_cast_setkey (c, key, keylen); return rc; } gcry_cipher_spec_t _gcry_cipher_spec_cast5 = { GCRY_CIPHER_CAST5, {0, 0}, "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context), cast_setkey, encrypt_block, decrypt_block }; diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S index 1b7cfba8..f25573d9 100644 --- a/cipher/des-amd64.S +++ b/cipher/des-amd64.S @@ -1,1036 +1,1037 @@ /* des-amd64.S - AMD64 assembly implementation of 3DES cipher * * Copyright (C) 2014 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifdef __x86_64 #include #if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text #define s1 0 #define s2 ((s1) + (64*8)) #define s3 ((s2) + (64*8)) #define s4 ((s3) + (64*8)) #define s5 ((s4) + (64*8)) #define s6 ((s5) + (64*8)) #define s7 ((s6) + (64*8)) #define s8 ((s7) + (64*8)) /* register macros */ #define CTX %rdi #define SBOXES %rbp #define RL0 %r8 #define RL1 %r9 #define RL2 %r10 #define RL0d %r8d #define RL1d %r9d #define RL2d %r10d #define RR0 %r11 #define RR1 %r12 #define RR2 %r13 #define RR0d %r11d #define RR1d %r12d #define RR2d %r13d #define RW0 %rax #define RW1 %rbx #define RW2 %rcx #define RW0d %eax #define RW1d %ebx #define RW2d %ecx #define RW0bl %al #define RW1bl %bl #define RW2bl %cl #define RW0bh %ah #define RW1bh %bh #define RW2bh %ch #define RT0 %r15 #define RT1 %rsi #define RT2 %r14 #define RT3 %rdx #define RT0d %r15d #define RT1d %esi #define RT2d %r14d #define RT3d %edx /*********************************************************************** * 1-way 3DES ***********************************************************************/ #define do_permutation(a, b, offset, mask) \ movl a, RT0d; \ shrl $(offset), RT0d; \ xorl b, RT0d; \ andl $(mask), RT0d; \ xorl RT0d, b; \ shll $(offset), RT0d; \ xorl RT0d, a; #define expand_to_64bits(val, mask) \ movl val##d, RT0d; \ rorl $4, RT0d; \ shlq $32, RT0; \ orq RT0, val; \ andq mask, val; #define compress_to_64bits(val) \ movq val, RT0; \ shrq $32, RT0; \ roll $4, RT0d; \ orl RT0d, val##d; #define initial_permutation(left, right) \ do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ do_permutation(left##d, right##d, 16, 0x0000ffff); \ do_permutation(right##d, left##d, 2, 0x33333333); \ do_permutation(right##d, left##d, 8, 0x00ff00ff); \ movabs $0x3f3f3f3f3f3f3f3f, RT3; \ movl left##d, RW0d; \ roll $1, right##d; \ xorl right##d, RW0d; \ andl $0xaaaaaaaa, RW0d; \ xorl RW0d, left##d; \ xorl RW0d, right##d; \ roll $1, left##d; \ expand_to_64bits(right, RT3); \ expand_to_64bits(left, RT3); #define final_permutation(left, right) \ compress_to_64bits(right); \ compress_to_64bits(left); \ movl right##d, RW0d; \ rorl $1, left##d; \ xorl left##d, RW0d; \ andl $0xaaaaaaaa, RW0d; \ xorl RW0d, right##d; \ xorl RW0d, left##d; \ rorl $1, right##d; \ do_permutation(right##d, left##d, 8, 0x00ff00ff); \ do_permutation(right##d, left##d, 2, 0x33333333); \ do_permutation(left##d, right##d, 16, 0x0000ffff); \ do_permutation(left##d, right##d, 4, 0x0f0f0f0f); #define round1(n, from, to, load_next_key) \ xorq from, RW0; \ \ movzbl RW0bl, RT0d; \ movzbl RW0bh, RT1d; \ shrq $16, RW0; \ movzbl RW0bl, RT2d; \ movzbl RW0bh, RT3d; \ shrq $16, RW0; \ movq s8(SBOXES, RT0, 8), RT0; \ xorq s6(SBOXES, RT1, 8), to; \ movzbl RW0bl, RL1d; \ movzbl RW0bh, RT1d; \ shrl $16, RW0d; \ xorq s4(SBOXES, RT2, 8), RT0; \ xorq s2(SBOXES, RT3, 8), to; \ movzbl RW0bl, RT2d; \ movzbl RW0bh, RT3d; \ xorq s7(SBOXES, RL1, 8), RT0; \ xorq s5(SBOXES, RT1, 8), to; \ xorq s3(SBOXES, RT2, 8), RT0; \ load_next_key(n, RW0); \ xorq RT0, to; \ xorq s1(SBOXES, RT3, 8), to; \ #define load_next_key(n, RWx) \ movq (((n) + 1) * 8)(CTX), RWx; #define dummy2(a, b) /*_*/ #define read_block(io, left, right) \ movl (io), left##d; \ movl 4(io), right##d; \ bswapl left##d; \ bswapl right##d; #define write_block(io, left, right) \ bswapl left##d; \ bswapl right##d; \ movl left##d, (io); \ movl right##d, 4(io); .align 8 .globl _gcry_3des_amd64_crypt_block ELF(.type _gcry_3des_amd64_crypt_block,@function;) _gcry_3des_amd64_crypt_block: /* input: * %rdi: round keys, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; pushq %rsi; /*dst*/ - leaq .L_s1 RIP, SBOXES; + leaq .L_s1 rRIP, SBOXES; read_block(%rdx, RL0, RR0); initial_permutation(RL0, RR0); movq (CTX), RW0; round1(0, RR0, RL0, load_next_key); round1(1, RL0, RR0, load_next_key); round1(2, RR0, RL0, load_next_key); round1(3, RL0, RR0, load_next_key); round1(4, RR0, RL0, load_next_key); round1(5, RL0, RR0, load_next_key); round1(6, RR0, RL0, load_next_key); round1(7, RL0, RR0, load_next_key); round1(8, RR0, RL0, load_next_key); round1(9, RL0, RR0, load_next_key); round1(10, RR0, RL0, load_next_key); round1(11, RL0, RR0, load_next_key); round1(12, RR0, RL0, load_next_key); round1(13, RL0, RR0, load_next_key); round1(14, RR0, RL0, load_next_key); round1(15, RL0, RR0, load_next_key); round1(16+0, RL0, RR0, load_next_key); round1(16+1, RR0, RL0, load_next_key); round1(16+2, RL0, RR0, load_next_key); round1(16+3, RR0, RL0, load_next_key); round1(16+4, RL0, RR0, load_next_key); round1(16+5, RR0, RL0, load_next_key); round1(16+6, RL0, RR0, load_next_key); round1(16+7, RR0, RL0, load_next_key); round1(16+8, RL0, RR0, load_next_key); round1(16+9, RR0, RL0, load_next_key); round1(16+10, RL0, RR0, load_next_key); round1(16+11, RR0, RL0, load_next_key); round1(16+12, RL0, RR0, load_next_key); round1(16+13, RR0, RL0, load_next_key); round1(16+14, RL0, RR0, load_next_key); round1(16+15, RR0, RL0, load_next_key); round1(32+0, RR0, RL0, load_next_key); round1(32+1, RL0, RR0, load_next_key); round1(32+2, RR0, RL0, load_next_key); round1(32+3, RL0, RR0, load_next_key); round1(32+4, RR0, RL0, load_next_key); round1(32+5, RL0, RR0, load_next_key); round1(32+6, RR0, RL0, load_next_key); round1(32+7, RL0, RR0, load_next_key); round1(32+8, RR0, RL0, load_next_key); round1(32+9, RL0, RR0, load_next_key); round1(32+10, RR0, RL0, load_next_key); round1(32+11, RL0, RR0, load_next_key); round1(32+12, RR0, RL0, load_next_key); round1(32+13, RL0, RR0, load_next_key); round1(32+14, RR0, RL0, load_next_key); round1(32+15, RL0, RR0, dummy2); popq RW2; /*dst*/ final_permutation(RR0, RL0); write_block(RW2, RR0, RL0); popq %r15; popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;) /*********************************************************************** * 3-way 3DES ***********************************************************************/ #define expand_to_64bits(val, mask) \ movl val##d, RT0d; \ rorl $4, RT0d; \ shlq $32, RT0; \ orq RT0, val; \ andq mask, val; #define compress_to_64bits(val) \ movq val, RT0; \ shrq $32, RT0; \ roll $4, RT0d; \ orl RT0d, val##d; #define initial_permutation3(left, right) \ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ \ do_permutation(right##0d, left##0d, 2, 0x33333333); \ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ do_permutation(right##1d, left##1d, 2, 0x33333333); \ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ do_permutation(right##2d, left##2d, 2, 0x33333333); \ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ \ movabs $0x3f3f3f3f3f3f3f3f, RT3; \ \ movl left##0d, RW0d; \ roll $1, right##0d; \ xorl right##0d, RW0d; \ andl $0xaaaaaaaa, RW0d; \ xorl RW0d, left##0d; \ xorl RW0d, right##0d; \ roll $1, left##0d; \ expand_to_64bits(right##0, RT3); \ expand_to_64bits(left##0, RT3); \ movl left##1d, RW1d; \ roll $1, right##1d; \ xorl right##1d, RW1d; \ andl $0xaaaaaaaa, RW1d; \ xorl RW1d, left##1d; \ xorl RW1d, right##1d; \ roll $1, left##1d; \ expand_to_64bits(right##1, RT3); \ expand_to_64bits(left##1, RT3); \ movl left##2d, RW2d; \ roll $1, right##2d; \ xorl right##2d, RW2d; \ andl $0xaaaaaaaa, RW2d; \ xorl RW2d, left##2d; \ xorl RW2d, right##2d; \ roll $1, left##2d; \ expand_to_64bits(right##2, RT3); \ expand_to_64bits(left##2, RT3); #define final_permutation3(left, right) \ compress_to_64bits(right##0); \ compress_to_64bits(left##0); \ movl right##0d, RW0d; \ rorl $1, left##0d; \ xorl left##0d, RW0d; \ andl $0xaaaaaaaa, RW0d; \ xorl RW0d, right##0d; \ xorl RW0d, left##0d; \ rorl $1, right##0d; \ compress_to_64bits(right##1); \ compress_to_64bits(left##1); \ movl right##1d, RW1d; \ rorl $1, left##1d; \ xorl left##1d, RW1d; \ andl $0xaaaaaaaa, RW1d; \ xorl RW1d, right##1d; \ xorl RW1d, left##1d; \ rorl $1, right##1d; \ compress_to_64bits(right##2); \ compress_to_64bits(left##2); \ movl right##2d, RW2d; \ rorl $1, left##2d; \ xorl left##2d, RW2d; \ andl $0xaaaaaaaa, RW2d; \ xorl RW2d, right##2d; \ xorl RW2d, left##2d; \ rorl $1, right##2d; \ \ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ do_permutation(right##0d, left##0d, 2, 0x33333333); \ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ do_permutation(right##1d, left##1d, 2, 0x33333333); \ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ do_permutation(right##2d, left##2d, 2, 0x33333333); \ \ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); #define round3(n, from, to, load_next_key, do_movq) \ xorq from##0, RW0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrq $16, RW0; \ xorq s8(SBOXES, RT3, 8), to##0; \ xorq s6(SBOXES, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrq $16, RW0; \ xorq s4(SBOXES, RT3, 8), to##0; \ xorq s2(SBOXES, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrl $16, RW0d; \ xorq s7(SBOXES, RT3, 8), to##0; \ xorq s5(SBOXES, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ load_next_key(n, RW0); \ xorq s3(SBOXES, RT3, 8), to##0; \ xorq s1(SBOXES, RT1, 8), to##0; \ xorq from##1, RW1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrq $16, RW1; \ xorq s8(SBOXES, RT3, 8), to##1; \ xorq s6(SBOXES, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrq $16, RW1; \ xorq s4(SBOXES, RT3, 8), to##1; \ xorq s2(SBOXES, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrl $16, RW1d; \ xorq s7(SBOXES, RT3, 8), to##1; \ xorq s5(SBOXES, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ do_movq(RW0, RW1); \ xorq s3(SBOXES, RT3, 8), to##1; \ xorq s1(SBOXES, RT1, 8), to##1; \ xorq from##2, RW2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrq $16, RW2; \ xorq s8(SBOXES, RT3, 8), to##2; \ xorq s6(SBOXES, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrq $16, RW2; \ xorq s4(SBOXES, RT3, 8), to##2; \ xorq s2(SBOXES, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrl $16, RW2d; \ xorq s7(SBOXES, RT3, 8), to##2; \ xorq s5(SBOXES, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ do_movq(RW0, RW2); \ xorq s3(SBOXES, RT3, 8), to##2; \ xorq s1(SBOXES, RT1, 8), to##2; #define __movq(src, dst) \ movq src, dst; #define read_block(io, left, right) \ movl (io), left##d; \ movl 4(io), right##d; \ bswapl left##d; \ bswapl right##d; #define write_block(io, left, right) \ bswapl left##d; \ bswapl right##d; \ movl left##d, (io); \ movl right##d, 4(io); .align 8 ELF(.type _gcry_3des_amd64_crypt_blk3,@function;) _gcry_3des_amd64_crypt_blk3: /* input: * %rdi: round keys, CTX * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks */ - leaq .L_s1 RIP, SBOXES; + leaq .L_s1 rRIP, SBOXES; initial_permutation3(RL, RR); movq 0(CTX), RW0; movq RW0, RW1; movq RW0, RW2; round3(0, RR, RL, load_next_key, __movq); round3(1, RL, RR, load_next_key, __movq); round3(2, RR, RL, load_next_key, __movq); round3(3, RL, RR, load_next_key, __movq); round3(4, RR, RL, load_next_key, __movq); round3(5, RL, RR, load_next_key, __movq); round3(6, RR, RL, load_next_key, __movq); round3(7, RL, RR, load_next_key, __movq); round3(8, RR, RL, load_next_key, __movq); round3(9, RL, RR, load_next_key, __movq); round3(10, RR, RL, load_next_key, __movq); round3(11, RL, RR, load_next_key, __movq); round3(12, RR, RL, load_next_key, __movq); round3(13, RL, RR, load_next_key, __movq); round3(14, RR, RL, load_next_key, __movq); round3(15, RL, RR, load_next_key, __movq); round3(16+0, RL, RR, load_next_key, __movq); round3(16+1, RR, RL, load_next_key, __movq); round3(16+2, RL, RR, load_next_key, __movq); round3(16+3, RR, RL, load_next_key, __movq); round3(16+4, RL, RR, load_next_key, __movq); round3(16+5, RR, RL, load_next_key, __movq); round3(16+6, RL, RR, load_next_key, __movq); round3(16+7, RR, RL, load_next_key, __movq); round3(16+8, RL, RR, load_next_key, __movq); round3(16+9, RR, RL, load_next_key, __movq); round3(16+10, RL, RR, load_next_key, __movq); round3(16+11, RR, RL, load_next_key, __movq); round3(16+12, RL, RR, load_next_key, __movq); round3(16+13, RR, RL, load_next_key, __movq); round3(16+14, RL, RR, load_next_key, __movq); round3(16+15, RR, RL, load_next_key, __movq); round3(32+0, RR, RL, load_next_key, __movq); round3(32+1, RL, RR, load_next_key, __movq); round3(32+2, RR, RL, load_next_key, __movq); round3(32+3, RL, RR, load_next_key, __movq); round3(32+4, RR, RL, load_next_key, __movq); round3(32+5, RL, RR, load_next_key, __movq); round3(32+6, RR, RL, load_next_key, __movq); round3(32+7, RL, RR, load_next_key, __movq); round3(32+8, RR, RL, load_next_key, __movq); round3(32+9, RL, RR, load_next_key, __movq); round3(32+10, RR, RL, load_next_key, __movq); round3(32+11, RL, RR, load_next_key, __movq); round3(32+12, RR, RL, load_next_key, __movq); round3(32+13, RL, RR, load_next_key, __movq); round3(32+14, RR, RL, load_next_key, __movq); round3(32+15, RL, RR, dummy2, dummy2); final_permutation3(RR, RL); ret; ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;) .align 8 .globl _gcry_3des_amd64_cbc_dec ELF(.type _gcry_3des_amd64_cbc_dec,@function;) _gcry_3des_amd64_cbc_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; pushq %rsi; /*dst*/ pushq %rdx; /*src*/ pushq %rcx; /*iv*/ /* load input */ movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; movl 2 * 4(%rdx), RL1d; movl 3 * 4(%rdx), RR1d; movl 4 * 4(%rdx), RL2d; movl 5 * 4(%rdx), RR2d; bswapl RL0d; bswapl RR0d; bswapl RL1d; bswapl RR1d; bswapl RL2d; bswapl RR2d; call _gcry_3des_amd64_crypt_blk3; popq %rcx; /*iv*/ popq %rdx; /*src*/ popq %rsi; /*dst*/ bswapl RR0d; bswapl RL0d; bswapl RR1d; bswapl RL1d; bswapl RR2d; bswapl RL2d; movq 2 * 8(%rdx), RT0; xorl 0 * 4(%rcx), RR0d; xorl 1 * 4(%rcx), RL0d; xorl 0 * 4(%rdx), RR1d; xorl 1 * 4(%rdx), RL1d; xorl 2 * 4(%rdx), RR2d; xorl 3 * 4(%rdx), RL2d; movq RT0, (%rcx); /* store new IV */ movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); movl RL1d, 3 * 4(%rsi); movl RR2d, 4 * 4(%rsi); movl RL2d, 5 * 4(%rsi); popq %r15; popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) .align 8 .globl _gcry_3des_amd64_ctr_enc ELF(.type _gcry_3des_amd64_ctr_enc,@function;) _gcry_3des_amd64_ctr_enc: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; pushq %rsi; /*dst*/ pushq %rdx; /*src*/ movq %rcx, RW2; /* load IV and byteswap */ movq (RW2), RT0; bswapq RT0; movq RT0, RR0; /* construct IVs */ leaq 1(RT0), RR1; leaq 2(RT0), RR2; leaq 3(RT0), RT0; movq RR0, RL0; movq RR1, RL1; movq RR2, RL2; bswapq RT0; shrq $32, RL0; shrq $32, RL1; shrq $32, RL2; /* store new IV */ movq RT0, (RW2); call _gcry_3des_amd64_crypt_blk3; popq %rdx; /*src*/ popq %rsi; /*dst*/ bswapl RR0d; bswapl RL0d; bswapl RR1d; bswapl RL1d; bswapl RR2d; bswapl RL2d; xorl 0 * 4(%rdx), RR0d; xorl 1 * 4(%rdx), RL0d; xorl 2 * 4(%rdx), RR1d; xorl 3 * 4(%rdx), RL1d; xorl 4 * 4(%rdx), RR2d; xorl 5 * 4(%rdx), RL2d; movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); movl RL1d, 3 * 4(%rsi); movl RR2d, 4 * 4(%rsi); movl RL2d, 5 * 4(%rsi); popq %r15; popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) .align 8 .globl _gcry_3des_amd64_cfb_dec ELF(.type _gcry_3des_amd64_cfb_dec,@function;) _gcry_3des_amd64_cfb_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (64bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; pushq %rsi; /*dst*/ pushq %rdx; /*src*/ movq %rcx, RW2; /* Load input */ movl 0 * 4(RW2), RL0d; movl 1 * 4(RW2), RR0d; movl 0 * 4(%rdx), RL1d; movl 1 * 4(%rdx), RR1d; movl 2 * 4(%rdx), RL2d; movl 3 * 4(%rdx), RR2d; bswapl RL0d; bswapl RR0d; bswapl RL1d; bswapl RR1d; bswapl RL2d; bswapl RR2d; /* Update IV */ movq 4 * 4(%rdx), RW0; movq RW0, (RW2); call _gcry_3des_amd64_crypt_blk3; popq %rdx; /*src*/ popq %rsi; /*dst*/ bswapl RR0d; bswapl RL0d; bswapl RR1d; bswapl RL1d; bswapl RR2d; bswapl RL2d; xorl 0 * 4(%rdx), RR0d; xorl 1 * 4(%rdx), RL0d; xorl 2 * 4(%rdx), RR1d; xorl 3 * 4(%rdx), RL1d; xorl 4 * 4(%rdx), RR2d; xorl 5 * 4(%rdx), RL2d; movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); movl RL1d, 3 * 4(%rsi); movl RR2d, 4 * 4(%rsi); movl RL2d, 5 * 4(%rsi); popq %r15; popq %r14; popq %r13; popq %r12; popq %rbx; popq %rbp; + + EXIT_SYSV_FUNC ret; ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;) .align 16 .L_s1: .quad 0x0010100001010400, 0x0000000000000000 .quad 0x0000100000010000, 0x0010100001010404 .quad 0x0010100001010004, 0x0000100000010404 .quad 0x0000000000000004, 0x0000100000010000 .quad 0x0000000000000400, 0x0010100001010400 .quad 0x0010100001010404, 0x0000000000000400 .quad 0x0010000001000404, 0x0010100001010004 .quad 0x0010000001000000, 0x0000000000000004 .quad 0x0000000000000404, 0x0010000001000400 .quad 0x0010000001000400, 0x0000100000010400 .quad 0x0000100000010400, 0x0010100001010000 .quad 0x0010100001010000, 0x0010000001000404 .quad 0x0000100000010004, 0x0010000001000004 .quad 0x0010000001000004, 0x0000100000010004 .quad 0x0000000000000000, 0x0000000000000404 .quad 0x0000100000010404, 0x0010000001000000 .quad 0x0000100000010000, 0x0010100001010404 .quad 0x0000000000000004, 0x0010100001010000 .quad 0x0010100001010400, 0x0010000001000000 .quad 0x0010000001000000, 0x0000000000000400 .quad 0x0010100001010004, 0x0000100000010000 .quad 0x0000100000010400, 0x0010000001000004 .quad 0x0000000000000400, 0x0000000000000004 .quad 0x0010000001000404, 0x0000100000010404 .quad 0x0010100001010404, 0x0000100000010004 .quad 0x0010100001010000, 0x0010000001000404 .quad 0x0010000001000004, 0x0000000000000404 .quad 0x0000100000010404, 0x0010100001010400 .quad 0x0000000000000404, 0x0010000001000400 .quad 0x0010000001000400, 0x0000000000000000 .quad 0x0000100000010004, 0x0000100000010400 .quad 0x0000000000000000, 0x0010100001010004 .L_s2: .quad 0x0801080200100020, 0x0800080000000000 .quad 0x0000080000000000, 0x0001080200100020 .quad 0x0001000000100000, 0x0000000200000020 .quad 0x0801000200100020, 0x0800080200000020 .quad 0x0800000200000020, 0x0801080200100020 .quad 0x0801080000100000, 0x0800000000000000 .quad 0x0800080000000000, 0x0001000000100000 .quad 0x0000000200000020, 0x0801000200100020 .quad 0x0001080000100000, 0x0001000200100020 .quad 0x0800080200000020, 0x0000000000000000 .quad 0x0800000000000000, 0x0000080000000000 .quad 0x0001080200100020, 0x0801000000100000 .quad 0x0001000200100020, 0x0800000200000020 .quad 0x0000000000000000, 0x0001080000100000 .quad 0x0000080200000020, 0x0801080000100000 .quad 0x0801000000100000, 0x0000080200000020 .quad 0x0000000000000000, 0x0001080200100020 .quad 0x0801000200100020, 0x0001000000100000 .quad 0x0800080200000020, 0x0801000000100000 .quad 0x0801080000100000, 0x0000080000000000 .quad 0x0801000000100000, 0x0800080000000000 .quad 0x0000000200000020, 0x0801080200100020 .quad 0x0001080200100020, 0x0000000200000020 .quad 0x0000080000000000, 0x0800000000000000 .quad 0x0000080200000020, 0x0801080000100000 .quad 0x0001000000100000, 0x0800000200000020 .quad 0x0001000200100020, 0x0800080200000020 .quad 0x0800000200000020, 0x0001000200100020 .quad 0x0001080000100000, 0x0000000000000000 .quad 0x0800080000000000, 0x0000080200000020 .quad 0x0800000000000000, 0x0801000200100020 .quad 0x0801080200100020, 0x0001080000100000 .L_s3: .quad 0x0000002000000208, 0x0000202008020200 .quad 0x0000000000000000, 0x0000200008020008 .quad 0x0000002008000200, 0x0000000000000000 .quad 0x0000202000020208, 0x0000002008000200 .quad 0x0000200000020008, 0x0000000008000008 .quad 0x0000000008000008, 0x0000200000020000 .quad 0x0000202008020208, 0x0000200000020008 .quad 0x0000200008020000, 0x0000002000000208 .quad 0x0000000008000000, 0x0000000000000008 .quad 0x0000202008020200, 0x0000002000000200 .quad 0x0000202000020200, 0x0000200008020000 .quad 0x0000200008020008, 0x0000202000020208 .quad 0x0000002008000208, 0x0000202000020200 .quad 0x0000200000020000, 0x0000002008000208 .quad 0x0000000000000008, 0x0000202008020208 .quad 0x0000002000000200, 0x0000000008000000 .quad 0x0000202008020200, 0x0000000008000000 .quad 0x0000200000020008, 0x0000002000000208 .quad 0x0000200000020000, 0x0000202008020200 .quad 0x0000002008000200, 0x0000000000000000 .quad 0x0000002000000200, 0x0000200000020008 .quad 0x0000202008020208, 0x0000002008000200 .quad 0x0000000008000008, 0x0000002000000200 .quad 0x0000000000000000, 0x0000200008020008 .quad 0x0000002008000208, 0x0000200000020000 .quad 0x0000000008000000, 0x0000202008020208 .quad 0x0000000000000008, 0x0000202000020208 .quad 0x0000202000020200, 0x0000000008000008 .quad 0x0000200008020000, 0x0000002008000208 .quad 0x0000002000000208, 0x0000200008020000 .quad 0x0000202000020208, 0x0000000000000008 .quad 0x0000200008020008, 0x0000202000020200 .L_s4: .quad 0x1008020000002001, 0x1000020800002001 .quad 0x1000020800002001, 0x0000000800000000 .quad 0x0008020800002000, 0x1008000800000001 .quad 0x1008000000000001, 0x1000020000002001 .quad 0x0000000000000000, 0x0008020000002000 .quad 0x0008020000002000, 0x1008020800002001 .quad 0x1000000800000001, 0x0000000000000000 .quad 0x0008000800000000, 0x1008000000000001 .quad 0x1000000000000001, 0x0000020000002000 .quad 0x0008000000000000, 0x1008020000002001 .quad 0x0000000800000000, 0x0008000000000000 .quad 0x1000020000002001, 0x0000020800002000 .quad 0x1008000800000001, 0x1000000000000001 .quad 0x0000020800002000, 0x0008000800000000 .quad 0x0000020000002000, 0x0008020800002000 .quad 0x1008020800002001, 0x1000000800000001 .quad 0x0008000800000000, 0x1008000000000001 .quad 0x0008020000002000, 0x1008020800002001 .quad 0x1000000800000001, 0x0000000000000000 .quad 0x0000000000000000, 0x0008020000002000 .quad 0x0000020800002000, 0x0008000800000000 .quad 0x1008000800000001, 0x1000000000000001 .quad 0x1008020000002001, 0x1000020800002001 .quad 0x1000020800002001, 0x0000000800000000 .quad 0x1008020800002001, 0x1000000800000001 .quad 0x1000000000000001, 0x0000020000002000 .quad 0x1008000000000001, 0x1000020000002001 .quad 0x0008020800002000, 0x1008000800000001 .quad 0x1000020000002001, 0x0000020800002000 .quad 0x0008000000000000, 0x1008020000002001 .quad 0x0000000800000000, 0x0008000000000000 .quad 0x0000020000002000, 0x0008020800002000 .L_s5: .quad 0x0000001000000100, 0x0020001002080100 .quad 0x0020000002080000, 0x0420001002000100 .quad 0x0000000000080000, 0x0000001000000100 .quad 0x0400000000000000, 0x0020000002080000 .quad 0x0400001000080100, 0x0000000000080000 .quad 0x0020001002000100, 0x0400001000080100 .quad 0x0420001002000100, 0x0420000002080000 .quad 0x0000001000080100, 0x0400000000000000 .quad 0x0020000002000000, 0x0400000000080000 .quad 0x0400000000080000, 0x0000000000000000 .quad 0x0400001000000100, 0x0420001002080100 .quad 0x0420001002080100, 0x0020001002000100 .quad 0x0420000002080000, 0x0400001000000100 .quad 0x0000000000000000, 0x0420000002000000 .quad 0x0020001002080100, 0x0020000002000000 .quad 0x0420000002000000, 0x0000001000080100 .quad 0x0000000000080000, 0x0420001002000100 .quad 0x0000001000000100, 0x0020000002000000 .quad 0x0400000000000000, 0x0020000002080000 .quad 0x0420001002000100, 0x0400001000080100 .quad 0x0020001002000100, 0x0400000000000000 .quad 0x0420000002080000, 0x0020001002080100 .quad 0x0400001000080100, 0x0000001000000100 .quad 0x0020000002000000, 0x0420000002080000 .quad 0x0420001002080100, 0x0000001000080100 .quad 0x0420000002000000, 0x0420001002080100 .quad 0x0020000002080000, 0x0000000000000000 .quad 0x0400000000080000, 0x0420000002000000 .quad 0x0000001000080100, 0x0020001002000100 .quad 0x0400001000000100, 0x0000000000080000 .quad 0x0000000000000000, 0x0400000000080000 .quad 0x0020001002080100, 0x0400001000000100 .L_s6: .quad 0x0200000120000010, 0x0204000020000000 .quad 0x0000040000000000, 0x0204040120000010 .quad 0x0204000020000000, 0x0000000100000010 .quad 0x0204040120000010, 0x0004000000000000 .quad 0x0200040020000000, 0x0004040100000010 .quad 0x0004000000000000, 0x0200000120000010 .quad 0x0004000100000010, 0x0200040020000000 .quad 0x0200000020000000, 0x0000040100000010 .quad 0x0000000000000000, 0x0004000100000010 .quad 0x0200040120000010, 0x0000040000000000 .quad 0x0004040000000000, 0x0200040120000010 .quad 0x0000000100000010, 0x0204000120000010 .quad 0x0204000120000010, 0x0000000000000000 .quad 0x0004040100000010, 0x0204040020000000 .quad 0x0000040100000010, 0x0004040000000000 .quad 0x0204040020000000, 0x0200000020000000 .quad 0x0200040020000000, 0x0000000100000010 .quad 0x0204000120000010, 0x0004040000000000 .quad 0x0204040120000010, 0x0004000000000000 .quad 0x0000040100000010, 0x0200000120000010 .quad 0x0004000000000000, 0x0200040020000000 .quad 0x0200000020000000, 0x0000040100000010 .quad 0x0200000120000010, 0x0204040120000010 .quad 0x0004040000000000, 0x0204000020000000 .quad 0x0004040100000010, 0x0204040020000000 .quad 0x0000000000000000, 0x0204000120000010 .quad 0x0000000100000010, 0x0000040000000000 .quad 0x0204000020000000, 0x0004040100000010 .quad 0x0000040000000000, 0x0004000100000010 .quad 0x0200040120000010, 0x0000000000000000 .quad 0x0204040020000000, 0x0200000020000000 .quad 0x0004000100000010, 0x0200040120000010 .L_s7: .quad 0x0002000000200000, 0x2002000004200002 .quad 0x2000000004000802, 0x0000000000000000 .quad 0x0000000000000800, 0x2000000004000802 .quad 0x2002000000200802, 0x0002000004200800 .quad 0x2002000004200802, 0x0002000000200000 .quad 0x0000000000000000, 0x2000000004000002 .quad 0x2000000000000002, 0x0000000004000000 .quad 0x2002000004200002, 0x2000000000000802 .quad 0x0000000004000800, 0x2002000000200802 .quad 0x2002000000200002, 0x0000000004000800 .quad 0x2000000004000002, 0x0002000004200000 .quad 0x0002000004200800, 0x2002000000200002 .quad 0x0002000004200000, 0x0000000000000800 .quad 0x2000000000000802, 0x2002000004200802 .quad 0x0002000000200800, 0x2000000000000002 .quad 0x0000000004000000, 0x0002000000200800 .quad 0x0000000004000000, 0x0002000000200800 .quad 0x0002000000200000, 0x2000000004000802 .quad 0x2000000004000802, 0x2002000004200002 .quad 0x2002000004200002, 0x2000000000000002 .quad 0x2002000000200002, 0x0000000004000000 .quad 0x0000000004000800, 0x0002000000200000 .quad 0x0002000004200800, 0x2000000000000802 .quad 0x2002000000200802, 0x0002000004200800 .quad 0x2000000000000802, 0x2000000004000002 .quad 0x2002000004200802, 0x0002000004200000 .quad 0x0002000000200800, 0x0000000000000000 .quad 0x2000000000000002, 0x2002000004200802 .quad 0x0000000000000000, 0x2002000000200802 .quad 0x0002000004200000, 0x0000000000000800 .quad 0x2000000004000002, 0x0000000004000800 .quad 0x0000000000000800, 0x2002000000200002 .L_s8: .quad 0x0100010410001000, 0x0000010000001000 .quad 0x0000000000040000, 0x0100010410041000 .quad 0x0100000010000000, 0x0100010410001000 .quad 0x0000000400000000, 0x0100000010000000 .quad 0x0000000400040000, 0x0100000010040000 .quad 0x0100010410041000, 0x0000010000041000 .quad 0x0100010010041000, 0x0000010400041000 .quad 0x0000010000001000, 0x0000000400000000 .quad 0x0100000010040000, 0x0100000410000000 .quad 0x0100010010001000, 0x0000010400001000 .quad 0x0000010000041000, 0x0000000400040000 .quad 0x0100000410040000, 0x0100010010041000 .quad 0x0000010400001000, 0x0000000000000000 .quad 0x0000000000000000, 0x0100000410040000 .quad 0x0100000410000000, 0x0100010010001000 .quad 0x0000010400041000, 0x0000000000040000 .quad 0x0000010400041000, 0x0000000000040000 .quad 0x0100010010041000, 0x0000010000001000 .quad 0x0000000400000000, 0x0100000410040000 .quad 0x0000010000001000, 0x0000010400041000 .quad 0x0100010010001000, 0x0000000400000000 .quad 0x0100000410000000, 0x0100000010040000 .quad 0x0100000410040000, 0x0100000010000000 .quad 0x0000000000040000, 0x0100010410001000 .quad 0x0000000000000000, 0x0100010410041000 .quad 0x0000000400040000, 0x0100000410000000 .quad 0x0100000010040000, 0x0100010010001000 .quad 0x0100010410001000, 0x0000000000000000 .quad 0x0100010410041000, 0x0000010000041000 .quad 0x0000010000041000, 0x0000010400001000 .quad 0x0000010400001000, 0x0000000400040000 .quad 0x0100000010000000, 0x0100010010041000 #endif #endif diff --git a/cipher/des.c b/cipher/des.c index 5c99f50d..7801b08f 100644 --- a/cipher/des.c +++ b/cipher/des.c @@ -1,1536 +1,1503 @@ /* des.c - DES and Triple-DES encryption/decryption Algorithm * Copyright (C) 1998, 1999, 2001, 2002, 2003, * 2008 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * * For a description of triple encryption, see: * Bruce Schneier: Applied Cryptography. Second Edition. * John Wiley & Sons, 1996. ISBN 0-471-12845-7. Pages 358 ff. * This implementation is according to the definition of DES in FIPS * PUB 46-2 from December 1993. */ /* * Written by Michael Roth , September 1998 */ /* * U S A G E * =========== * * For DES or Triple-DES encryption/decryption you must initialize a proper * encryption context with a key. * * A DES key is 64bit wide but only 56bits of the key are used. The remaining * bits are parity bits and they will _not_ checked in this implementation, but * simply ignored. * * For Triple-DES you could use either two 64bit keys or three 64bit keys. * The parity bits will _not_ checked, too. * * After initializing a context with a key you could use this context to * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode. * * (In the examples below the slashes at the beginning and ending of comments * are omitted.) * * DES Example * ----------- * unsigned char key[8]; * unsigned char plaintext[8]; * unsigned char ciphertext[8]; * unsigned char recoverd[8]; * des_ctx context; * * * Fill 'key' and 'plaintext' with some data * * .... * * * Set up the DES encryption context * * des_setkey(context, key); * * * Encrypt the plaintext * * des_ecb_encrypt(context, plaintext, ciphertext); * * * To recover the original plaintext from ciphertext use: * * des_ecb_decrypt(context, ciphertext, recoverd); * * * Triple-DES Example * ------------------ * unsigned char key1[8]; * unsigned char key2[8]; * unsigned char key3[8]; * unsigned char plaintext[8]; * unsigned char ciphertext[8]; * unsigned char recoverd[8]; * tripledes_ctx context; * * * If you would like to use two 64bit keys, fill 'key1' and'key2' * then setup the encryption context: * * tripledes_set2keys(context, key1, key2); * * * To use three 64bit keys with Triple-DES use: * * tripledes_set3keys(context, key1, key2, key3); * * * Encrypting plaintext with Triple-DES * * tripledes_ecb_encrypt(context, plaintext, ciphertext); * * * Decrypting ciphertext to recover the plaintext with Triple-DES * * tripledes_ecb_decrypt(context, ciphertext, recoverd); * * * Selftest * -------- * char *error_msg; * * * To perform a selftest of this DES/Triple-DES implementation use the * function selftest(). It will return an error string if there are * some problems with this library. * * * if ( (error_msg = selftest()) ) * { * fprintf(stderr, "An error in the DES/Triple-DES implementation occurred: %s\n", error_msg); * abort(); * } */ #include #include #include /* memcpy, memcmp */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #define DES_BLOCKSIZE 8 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif /* Helper macro to force alignment to 16 bytes. */ #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) #else # define ATTR_ALIGNED_16 #endif #if defined(__GNUC__) && defined(__GNU_LIBRARY__) # define working_memcmp memcmp #else /* * According to the SunOS man page, memcmp returns indeterminate sign * depending on whether characters are signed or not. */ static int working_memcmp( const void *_a, const void *_b, size_t n ) { const char *a = _a; const char *b = _b; for( ; n; n--, a++, b++ ) if( *a != *b ) return (int)(*(byte*)a) - (int)(*(byte*)b); return 0; } #endif /* * Encryption/Decryption context of DES */ typedef struct _des_ctx { u32 encrypt_subkeys[32]; u32 decrypt_subkeys[32]; } des_ctx[1]; /* * Encryption/Decryption context of Triple-DES */ typedef struct _tripledes_ctx { u32 encrypt_subkeys[96]; u32 decrypt_subkeys[96]; struct { int no_weak_key; } flags; } tripledes_ctx[1]; static void des_key_schedule (const byte *, u32 *); static int des_setkey (struct _des_ctx *, const byte *); static int des_ecb_crypt (struct _des_ctx *, const byte *, byte *, int); static int tripledes_set2keys (struct _tripledes_ctx *, const byte *, const byte *); static int tripledes_set3keys (struct _tripledes_ctx *, const byte *, const byte *, const byte *); static int tripledes_ecb_crypt (struct _tripledes_ctx *, const byte *, byte *, int); static int is_weak_key ( const byte *key ); static const char *selftest (void); static unsigned int do_tripledes_encrypt(void *context, byte *outbuf, const byte *inbuf ); static unsigned int do_tripledes_decrypt(void *context, byte *outbuf, const byte *inbuf ); static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key, unsigned keylen); static int initialized; /* * The s-box values are permuted according to the 'primitive function P' * and are rotated one bit to the left. */ static u32 sbox1[64] = { 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000, 0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004, 0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404, 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000, 0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400, 0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404, 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400, 0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004 }; static u32 sbox2[64] = { 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020, 0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020, 0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000, 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020, 0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000, 0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000, 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020, 0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000 }; static u32 sbox3[64] = { 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200, 0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208, 0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208, 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000, 0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000, 0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008, 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008, 0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200 }; static u32 sbox4[64] = { 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001, 0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001, 0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080, 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081, 0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000, 0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081, 0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080 }; static u32 sbox5[64] = { 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000, 0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000, 0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100, 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100, 0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100, 0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000, 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000, 0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100 }; static u32 sbox6[64] = { 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000, 0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010, 0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010, 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000, 0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010, 0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000, 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010, 0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010 }; static u32 sbox7[64] = { 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800, 0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802, 0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002, 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800, 0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002, 0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800, 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802, 0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002 }; static u32 sbox8[64] = { 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000, 0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040, 0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000, 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000, 0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040, 0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040, 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000, 0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000 }; /* * These two tables are part of the 'permuted choice 1' function. * In this implementation several speed improvements are done. */ static u32 leftkey_swap[16] = { 0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101 }; static u32 rightkey_swap[16] = { 0x00000000, 0x01000000, 0x00010000, 0x01010000, 0x00000100, 0x01000100, 0x00010100, 0x01010100, 0x00000001, 0x01000001, 0x00010001, 0x01010001, 0x00000101, 0x01000101, 0x00010101, 0x01010101, }; /* * Numbers of left shifts per round for encryption subkeys. * To calculate the decryption subkeys we just reverse the * ordering of the calculated encryption subkeys. So their * is no need for a decryption rotate tab. */ static byte encrypt_rotate_tab[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; /* * Table with weak DES keys sorted in ascending order. * In DES their are 64 known keys which are weak. They are weak * because they produce only one, two or four different * subkeys in the subkey scheduling process. * The keys in this table have all their parity bits cleared. */ static byte weak_keys[64][8] = { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /*w*/ { 0x00, 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e }, { 0x00, 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0 }, { 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe }, { 0x00, 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e }, /*sw*/ { 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00 }, { 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe }, { 0x00, 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0 }, { 0x00, 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0 }, /*sw*/ { 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe }, { 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00 }, { 0x00, 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e }, { 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe }, /*sw*/ { 0x00, 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0 }, { 0x00, 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e }, { 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00 }, { 0x1e, 0x00, 0x00, 0x1e, 0x0e, 0x00, 0x00, 0x0e }, { 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e, 0x00 }, /*sw*/ { 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0, 0xfe }, { 0x1e, 0x00, 0xfe, 0xe0, 0x0e, 0x00, 0xfe, 0xf0 }, { 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00, 0x00 }, { 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x0e, 0x0e, 0x0e }, /*w*/ { 0x1e, 0x1e, 0xe0, 0xe0, 0x0e, 0x0e, 0xf0, 0xf0 }, { 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe, 0xfe }, { 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00, 0xfe }, { 0x1e, 0xe0, 0x1e, 0xe0, 0x0e, 0xf0, 0x0e, 0xf0 }, /*sw*/ { 0x1e, 0xe0, 0xe0, 0x1e, 0x0e, 0xf0, 0xf0, 0x0e }, { 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe, 0x00 }, { 0x1e, 0xfe, 0x00, 0xe0, 0x0e, 0xfe, 0x00, 0xf0 }, { 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e, 0xfe }, /*sw*/ { 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0, 0x00 }, { 0x1e, 0xfe, 0xfe, 0x1e, 0x0e, 0xfe, 0xfe, 0x0e }, { 0xe0, 0x00, 0x00, 0xe0, 0xf0, 0x00, 0x00, 0xf0 }, { 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e, 0xfe }, { 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0, 0x00 }, /*sw*/ { 0xe0, 0x00, 0xfe, 0x1e, 0xf0, 0x00, 0xfe, 0x0e }, { 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00, 0xfe }, { 0xe0, 0x1e, 0x1e, 0xe0, 0xf0, 0x0e, 0x0e, 0xf0 }, { 0xe0, 0x1e, 0xe0, 0x1e, 0xf0, 0x0e, 0xf0, 0x0e }, /*sw*/ { 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe, 0x00 }, { 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00, 0x00 }, { 0xe0, 0xe0, 0x1e, 0x1e, 0xf0, 0xf0, 0x0e, 0x0e }, { 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0 }, /*w*/ { 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe, 0xfe }, { 0xe0, 0xfe, 0x00, 0x1e, 0xf0, 0xfe, 0x00, 0x0e }, { 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e, 0x00 }, { 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0, 0xfe }, /*sw*/ { 0xe0, 0xfe, 0xfe, 0xe0, 0xf0, 0xfe, 0xfe, 0xf0 }, { 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe }, { 0xfe, 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0 }, { 0xfe, 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e }, { 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00 }, /*sw*/ { 0xfe, 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0 }, { 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe }, { 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00 }, { 0xfe, 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e }, /*sw*/ { 0xfe, 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e }, { 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00 }, { 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe }, { 0xfe, 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0 }, /*sw*/ { 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00 }, { 0xfe, 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e }, { 0xfe, 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0 }, { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe } /*w*/ }; static unsigned char weak_keys_chksum[20] = { 0xD0, 0xCF, 0x07, 0x38, 0x93, 0x70, 0x8A, 0x83, 0x7D, 0xD7, 0x8A, 0x36, 0x65, 0x29, 0x6C, 0x1F, 0x7C, 0x3F, 0xD3, 0x41 }; /* * Macro to swap bits across two words. */ #define DO_PERMUTATION(a, temp, b, offset, mask) \ temp = ((a>>offset) ^ b) & mask; \ b ^= temp; \ a ^= temp<> 31); \ temp = (left ^ right) & 0xaaaaaaaa; \ right ^= temp; \ left ^= temp; \ left = (left << 1) | (left >> 31); /* * The 'inverse initial permutation'. */ #define FINAL_PERMUTATION(left, temp, right) \ left = (left << 31) | (left >> 1); \ temp = (left ^ right) & 0xaaaaaaaa; \ left ^= temp; \ right ^= temp; \ right = (right << 31) | (right >> 1); \ DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \ DO_PERMUTATION(right, temp, left, 2, 0x33333333) \ DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \ DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f) /* * A full DES round including 'expansion function', 'sbox substitution' * and 'primitive function P' but without swapping the left and right word. * Please note: The data in 'from' and 'to' is already rotated one bit to * the left, done in the initial permutation. */ #define DES_ROUND(from, to, work, subkey) \ work = from ^ *subkey++; \ to ^= sbox8[ work & 0x3f ]; \ to ^= sbox6[ (work>>8) & 0x3f ]; \ to ^= sbox4[ (work>>16) & 0x3f ]; \ to ^= sbox2[ (work>>24) & 0x3f ]; \ work = ((from << 28) | (from >> 4)) ^ *subkey++; \ to ^= sbox7[ work & 0x3f ]; \ to ^= sbox5[ (work>>8) & 0x3f ]; \ to ^= sbox3[ (work>>16) & 0x3f ]; \ to ^= sbox1[ (work>>24) & 0x3f ]; /* * Macros to convert 8 bytes from/to 32bit words. */ #define READ_64BIT_DATA(data, left, right) \ left = buf_get_be32(data + 0); \ right = buf_get_be32(data + 4); #define WRITE_64BIT_DATA(data, left, right) \ buf_put_be32(data + 0, left); \ buf_put_be32(data + 4, right); /* * Handy macros for encryption and decryption of data */ #define des_ecb_encrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 0) #define des_ecb_decrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 1) #define tripledes_ecb_encrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,0) #define tripledes_ecb_decrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,1) /* * des_key_schedule(): Calculate 16 subkeys pairs (even/odd) for * 16 encryption rounds. * To calculate subkeys for decryption the caller * have to reorder the generated subkeys. * * rawkey: 8 Bytes of key data * subkey: Array of at least 32 u32s. Will be filled * with calculated subkeys. * */ static void des_key_schedule (const byte * rawkey, u32 * subkey) { u32 left, right, work; int round; READ_64BIT_DATA (rawkey, left, right) DO_PERMUTATION (right, work, left, 4, 0x0f0f0f0f) DO_PERMUTATION (right, work, left, 0, 0x10101010) left = ((leftkey_swap[(left >> 0) & 0xf] << 3) | (leftkey_swap[(left >> 8) & 0xf] << 2) | (leftkey_swap[(left >> 16) & 0xf] << 1) | (leftkey_swap[(left >> 24) & 0xf]) | (leftkey_swap[(left >> 5) & 0xf] << 7) | (leftkey_swap[(left >> 13) & 0xf] << 6) | (leftkey_swap[(left >> 21) & 0xf] << 5) | (leftkey_swap[(left >> 29) & 0xf] << 4)); left &= 0x0fffffff; right = ((rightkey_swap[(right >> 1) & 0xf] << 3) | (rightkey_swap[(right >> 9) & 0xf] << 2) | (rightkey_swap[(right >> 17) & 0xf] << 1) | (rightkey_swap[(right >> 25) & 0xf]) | (rightkey_swap[(right >> 4) & 0xf] << 7) | (rightkey_swap[(right >> 12) & 0xf] << 6) | (rightkey_swap[(right >> 20) & 0xf] << 5) | (rightkey_swap[(right >> 28) & 0xf] << 4)); right &= 0x0fffffff; for (round = 0; round < 16; ++round) { left = ((left << encrypt_rotate_tab[round]) | (left >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; right = ((right << encrypt_rotate_tab[round]) | (right >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; *subkey++ = (((left << 4) & 0x24000000) | ((left << 28) & 0x10000000) | ((left << 14) & 0x08000000) | ((left << 18) & 0x02080000) | ((left << 6) & 0x01000000) | ((left << 9) & 0x00200000) | ((left >> 1) & 0x00100000) | ((left << 10) & 0x00040000) | ((left << 2) & 0x00020000) | ((left >> 10) & 0x00010000) | ((right >> 13) & 0x00002000) | ((right >> 4) & 0x00001000) | ((right << 6) & 0x00000800) | ((right >> 1) & 0x00000400) | ((right >> 14) & 0x00000200) | (right & 0x00000100) | ((right >> 5) & 0x00000020) | ((right >> 10) & 0x00000010) | ((right >> 3) & 0x00000008) | ((right >> 18) & 0x00000004) | ((right >> 26) & 0x00000002) | ((right >> 24) & 0x00000001)); *subkey++ = (((left << 15) & 0x20000000) | ((left << 17) & 0x10000000) | ((left << 10) & 0x08000000) | ((left << 22) & 0x04000000) | ((left >> 2) & 0x02000000) | ((left << 1) & 0x01000000) | ((left << 16) & 0x00200000) | ((left << 11) & 0x00100000) | ((left << 3) & 0x00080000) | ((left >> 6) & 0x00040000) | ((left << 15) & 0x00020000) | ((left >> 4) & 0x00010000) | ((right >> 2) & 0x00002000) | ((right << 8) & 0x00001000) | ((right >> 14) & 0x00000808) | ((right >> 9) & 0x00000400) | ((right) & 0x00000200) | ((right << 7) & 0x00000100) | ((right >> 7) & 0x00000020) | ((right >> 3) & 0x00000011) | ((right << 2) & 0x00000004) | ((right >> 21) & 0x00000002)); } } /* * Fill a DES context with subkeys calculated from a 64bit key. * Does not check parity bits, but simply ignore them. * Does not check for weak keys. */ static int des_setkey (struct _des_ctx *ctx, const byte * key) { static const char *selftest_failed; int i; if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; des_key_schedule (key, ctx->encrypt_subkeys); _gcry_burn_stack (32); for(i=0; i<32; i+=2) { ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; } return 0; } /* * Electronic Codebook Mode DES encryption/decryption of data according * to 'mode'. */ static int des_ecb_crypt (struct _des_ctx *ctx, const byte * from, byte * to, int mode) { u32 left, right, work; u32 *keys; keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; READ_64BIT_DATA (from, left, right) INITIAL_PERMUTATION (left, work, right) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) FINAL_PERMUTATION (right, work, left) WRITE_64BIT_DATA (to, right, left) return 0; } /* * Fill a Triple-DES context with subkeys calculated from two 64bit keys. * Does not check the parity bits of the keys, but simply ignore them. * Does not check for weak keys. */ static int tripledes_set2keys (struct _tripledes_ctx *ctx, const byte * key1, const byte * key2) { int i; des_key_schedule (key1, ctx->encrypt_subkeys); des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); _gcry_burn_stack (32); for(i=0; i<32; i+=2) { ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; ctx->encrypt_subkeys[i+64] = ctx->encrypt_subkeys[i]; ctx->encrypt_subkeys[i+65] = ctx->encrypt_subkeys[i+1]; ctx->decrypt_subkeys[i+64] = ctx->decrypt_subkeys[i]; ctx->decrypt_subkeys[i+65] = ctx->decrypt_subkeys[i+1]; } return 0; } /* * Fill a Triple-DES context with subkeys calculated from three 64bit keys. * Does not check the parity bits of the keys, but simply ignore them. * Does not check for weak keys. */ static int tripledes_set3keys (struct _tripledes_ctx *ctx, const byte * key1, const byte * key2, const byte * key3) { static const char *selftest_failed; int i; if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; des_key_schedule (key1, ctx->encrypt_subkeys); des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); des_key_schedule (key3, &(ctx->encrypt_subkeys[64])); _gcry_burn_stack (32); for(i=0; i<32; i+=2) { ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[94-i]; ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[95-i]; ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; ctx->decrypt_subkeys[i+64] = ctx->encrypt_subkeys[30-i]; ctx->decrypt_subkeys[i+65] = ctx->encrypt_subkeys[31-i]; } return 0; } #ifdef USE_AMD64_ASM /* Assembly implementation of triple-DES. */ extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out, const byte *in); /* These assembly implementations process three blocks in parallel. */ extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr); extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv); extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv); #define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *)) -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -static inline void -call_sysv_fn (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : - : "cc", "memory", "r8", "r9", "r10", "r11"); -} -#endif /* * Electronic Codebook Mode Triple-DES encryption/decryption of data * according to 'mode'. Sometimes this mode is named 'EDE' mode * (Encryption-Decryption-Encryption). */ static inline int tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, byte * to, int mode) { u32 *keys; keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_3des_amd64_crypt_block, keys, to, from, NULL); -#else _gcry_3des_amd64_crypt_block(keys, to, from); -#endif return 0; } static inline void tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_3des_amd64_ctr_enc, keys, out, in, ctr); -#else _gcry_3des_amd64_ctr_enc(keys, out, in, ctr); -#endif } static inline void tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_3des_amd64_cbc_dec, keys, out, in, iv); -#else _gcry_3des_amd64_cbc_dec(keys, out, in, iv); -#endif } static inline void tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn (_gcry_3des_amd64_cfb_dec, keys, out, in, iv); -#else _gcry_3des_amd64_cfb_dec(keys, out, in, iv); -#endif } #else /*USE_AMD64_ASM*/ #define TRIPLEDES_ECB_BURN_STACK 32 /* * Electronic Codebook Mode Triple-DES encryption/decryption of data * according to 'mode'. Sometimes this mode is named 'EDE' mode * (Encryption-Decryption-Encryption). */ static int tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, byte * to, int mode) { u32 left, right, work; u32 *keys; keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; READ_64BIT_DATA (from, left, right) INITIAL_PERMUTATION (left, work, right) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) FINAL_PERMUTATION (right, work, left) WRITE_64BIT_DATA (to, right, left) return 0; } #endif /*!USE_AMD64_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size DES_BLOCKSIZE. */ void _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { struct _tripledes_ctx *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[DES_BLOCKSIZE]; int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; int i; #ifdef USE_AMD64_ASM { int asm_burn_depth = 9 * sizeof(void *); if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) burn_stack_depth = asm_burn_depth; /* Process data in 3 block chunks. */ while (nblocks >= 3) { tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr); nblocks -= 3; outbuf += 3 * DES_BLOCKSIZE; inbuf += 3 * DES_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ tripledes_ecb_encrypt (ctx, ctr, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE); outbuf += DES_BLOCKSIZE; inbuf += DES_BLOCKSIZE; /* Increment the counter. */ for (i = DES_BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { struct _tripledes_ctx *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[DES_BLOCKSIZE]; int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; #ifdef USE_AMD64_ASM { int asm_burn_depth = 10 * sizeof(void *); if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) burn_stack_depth = asm_burn_depth; /* Process data in 3 block chunks. */ while (nblocks >= 3) { tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * DES_BLOCKSIZE; inbuf += 3 * DES_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ tripledes_ecb_decrypt (ctx, inbuf, savebuf); buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE); inbuf += DES_BLOCKSIZE; outbuf += DES_BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { struct _tripledes_ctx *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; #ifdef USE_AMD64_ASM { int asm_burn_depth = 9 * sizeof(void *); if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) burn_stack_depth = asm_burn_depth; /* Process data in 3 block chunks. */ while (nblocks >= 3) { tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * DES_BLOCKSIZE; inbuf += 3 * DES_BLOCKSIZE; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { tripledes_ecb_encrypt (ctx, iv, iv); buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE); outbuf += DES_BLOCKSIZE; inbuf += DES_BLOCKSIZE; } _gcry_burn_stack(burn_stack_depth); } /* * Check whether the 8 byte key is weak. * Does not check the parity bits of the key but simple ignore them. */ static int is_weak_key ( const byte *key ) { byte work[8]; int i, left, right, middle, cmp_result; /* clear parity bits */ for(i=0; i<8; ++i) work[i] = key[i] & 0xfe; /* binary search in the weak key table */ left = 0; right = 63; while(left <= right) { middle = (left + right) / 2; if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) ) return -1; if ( cmp_result > 0 ) left = middle + 1; else right = middle - 1; } return 0; } /* Alternative setkey for selftests; need larger key than default. */ static gcry_err_code_t bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen) { static const unsigned char key[24] ATTR_ALIGNED_16 = { 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22, 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82 }; (void)__key; (void)__keylen; return do_tripledes_setkey(context, key, sizeof(key)); } /* Run the self-tests for DES-CTR, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char * selftest_ctr (void) { const int nblocks = 3+1; const int blocksize = DES_BLOCKSIZE; const int context_size = sizeof(struct _tripledes_ctx); return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey, &do_tripledes_encrypt, &_gcry_3des_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for DES-CBC, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cbc (void) { const int nblocks = 3+2; const int blocksize = DES_BLOCKSIZE; const int context_size = sizeof(struct _tripledes_ctx); return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey, &do_tripledes_encrypt, &_gcry_3des_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for DES-CFB, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cfb (void) { const int nblocks = 3+2; const int blocksize = DES_BLOCKSIZE; const int context_size = sizeof(struct _tripledes_ctx); return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey, &do_tripledes_encrypt, &_gcry_3des_cfb_dec, nblocks, blocksize, context_size); } /* * Performs a selftest of this DES/Triple-DES implementation. * Returns an string with the error text on failure. * Returns NULL if all is ok. */ static const char * selftest (void) { const char *r; /* * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation * need this. */ if (sizeof (u32) != 4) return "Wrong word size for DES configured."; /* * DES Maintenance Test */ { int i; byte key[8] = {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55}; byte input[8] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; byte result[8] = {0x24, 0x6e, 0x9d, 0xb9, 0xc5, 0x50, 0x38, 0x1a}; byte temp1[8], temp2[8], temp3[8]; des_ctx des; for (i = 0; i < 64; ++i) { des_setkey (des, key); des_ecb_encrypt (des, input, temp1); des_ecb_encrypt (des, temp1, temp2); des_setkey (des, temp2); des_ecb_decrypt (des, temp1, temp3); memcpy (key, temp3, 8); memcpy (input, temp1, 8); } if (memcmp (temp3, result, 8)) return "DES maintenance test failed."; } /* * Self made Triple-DES test (Does somebody know an official test?) */ { int i; byte input[8] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; byte key1[8] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0}; byte key2[8] = {0x11, 0x22, 0x33, 0x44, 0xff, 0xaa, 0xcc, 0xdd}; byte result[8] = {0x7b, 0x38, 0x3b, 0x23, 0xa2, 0x7d, 0x26, 0xd3}; tripledes_ctx des3; for (i = 0; i < 16; ++i) { tripledes_set2keys (des3, key1, key2); tripledes_ecb_encrypt (des3, input, key1); tripledes_ecb_decrypt (des3, input, key2); tripledes_set3keys (des3, key1, input, key2); tripledes_ecb_encrypt (des3, input, input); } if (memcmp (input, result, 8)) return "Triple-DES test failed."; } /* * More Triple-DES test. These are testvectors as used by SSLeay, * thanks to Jeroen C. van Gelderen. */ { static const struct { byte key[24]; byte plain[8]; byte cipher[8]; } testdata[] = { { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, { 0x95,0xF8,0xA5,0xE5,0xDD,0x31,0xD9,0x00 }, { 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00 } }, { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, { 0x9D,0x64,0x55,0x5A,0x9A,0x10,0xB8,0x52, }, { 0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00 } }, { { 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E }, { 0x51,0x45,0x4B,0x58,0x2D,0xDF,0x44,0x0A }, { 0x71,0x78,0x87,0x6E,0x01,0xF1,0x9B,0x2A } }, { { 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6 }, { 0x42,0xFD,0x44,0x30,0x59,0x57,0x7F,0xA2 }, { 0xAF,0x37,0xFB,0x42,0x1F,0x8C,0x40,0x95 } }, { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, { 0x3D,0x12,0x4F,0xE2,0x19,0x8B,0xA3,0x18 } }, { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, { 0xFB,0xAB,0xA1,0xFF,0x9D,0x05,0xE9,0xB1 } }, { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, 0xFE,0xDC,0xBA,0x98,0x76,0x54,0x32,0x10 }, { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, { 0x18,0xd7,0x48,0xe5,0x63,0x62,0x05,0x72 } }, { { 0x03,0x52,0x02,0x07,0x67,0x20,0x82,0x17, 0x86,0x02,0x87,0x66,0x59,0x08,0x21,0x98, 0x64,0x05,0x6A,0xBD,0xFE,0xA9,0x34,0x57 }, { 0x73,0x71,0x75,0x69,0x67,0x67,0x6C,0x65 }, { 0xc0,0x7d,0x2a,0x0f,0xa5,0x66,0xfa,0x30 } }, { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x80,0x01,0x01,0x01,0x01,0x01,0x01,0x01, 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02 }, { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, { 0xe6,0xe6,0xdd,0x5b,0x7e,0x72,0x29,0x74 } }, { { 0x10,0x46,0x10,0x34,0x89,0x98,0x80,0x20, 0x91,0x07,0xD0,0x15,0x89,0x19,0x01,0x01, 0x19,0x07,0x92,0x10,0x98,0x1A,0x01,0x01 }, { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, { 0xe1,0xef,0x62,0xc3,0x32,0xfe,0x82,0x5b } } }; byte result[8]; int i; tripledes_ctx des3; for (i=0; iflags.no_weak_key) ; /* Detection has been disabled. */ else if (is_weak_key (key) || is_weak_key (key+8) || is_weak_key (key+16)) { _gcry_burn_stack (64); return GPG_ERR_WEAK_KEY; } _gcry_burn_stack (64); return GPG_ERR_NO_ERROR; } static gcry_err_code_t do_tripledes_set_extra_info (void *context, int what, const void *buffer, size_t buflen) { struct _tripledes_ctx *ctx = (struct _tripledes_ctx *)context; gpg_err_code_t ec = 0; (void)buffer; (void)buflen; switch (what) { case CIPHER_INFO_NO_WEAK_KEY: ctx->flags.no_weak_key = 1; break; default: ec = GPG_ERR_INV_OP; break; } return ec; } static unsigned int do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf ) { struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; tripledes_ecb_encrypt ( ctx, inbuf, outbuf ); return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; } static unsigned int do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf ) { struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; tripledes_ecb_decrypt ( ctx, inbuf, outbuf ); return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; } static gcry_err_code_t do_des_setkey (void *context, const byte *key, unsigned keylen) { struct _des_ctx *ctx = (struct _des_ctx *) context; if (keylen != 8) return GPG_ERR_INV_KEYLEN; des_setkey (ctx, key); if (is_weak_key (key)) { _gcry_burn_stack (64); return GPG_ERR_WEAK_KEY; } _gcry_burn_stack (64); return GPG_ERR_NO_ERROR; } static unsigned int do_des_encrypt( void *context, byte *outbuf, const byte *inbuf ) { struct _des_ctx *ctx = (struct _des_ctx *) context; des_ecb_encrypt ( ctx, inbuf, outbuf ); return /*burn_stack*/ (32); } static unsigned int do_des_decrypt( void *context, byte *outbuf, const byte *inbuf ) { struct _des_ctx *ctx = (struct _des_ctx *) context; des_ecb_decrypt ( ctx, inbuf, outbuf ); return /*burn_stack*/ (32); } /* Self-test section. */ /* Selftest for TripleDES. */ static gpg_err_code_t selftest_fips (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest (); if (errtxt) goto failed; /* The low-level self-tests are quite extensive and thus we can do without high level tests. This is also justified because we have no custom block code implementation for 3des but always use the standard high level block code. */ return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_3DES, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_3DES: ec = selftest_fips (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } gcry_cipher_spec_t _gcry_cipher_spec_des = { GCRY_CIPHER_DES, {0, 0}, "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx), do_des_setkey, do_des_encrypt, do_des_decrypt }; static gcry_cipher_oid_spec_t oids_tripledes[] = { { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC }, /* Teletrust specific OID for 3DES. */ { "1.3.36.3.1.3.2.1", GCRY_CIPHER_MODE_CBC }, /* pbeWithSHAAnd3_KeyTripleDES_CBC */ { "1.2.840.113549.1.12.1.3", GCRY_CIPHER_MODE_CBC }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_tripledes = { GCRY_CIPHER_3DES, {0, 1}, "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx), do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt, NULL, NULL, run_selftests, do_tripledes_set_extra_info }; diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S index b149e948..798ff51a 100644 --- a/cipher/rijndael-amd64.S +++ b/cipher/rijndael-amd64.S @@ -1,453 +1,451 @@ /* rinjdael-amd64.S - AMD64 assembly implementation of AES cipher * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifdef __x86_64 #include #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES) -#ifdef __PIC__ -# define RIP (%rip) -#else -# define RIP -#endif - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif +#include "asm-common-amd64.h" .text /* table macros */ #define E0 (0) #define Es0 (1) #define Esize 4 #define Essize 4 #define D0 (0) #define Ds0 (4 * 256) #define Dsize 4 #define Dssize 1 /* register macros */ #define CTX %rdi #define RTAB %r12 #define RA %rax #define RB %rbx #define RC %rcx #define RD %rdx #define RAd %eax #define RBd %ebx #define RCd %ecx #define RDd %edx #define RAbl %al #define RBbl %bl #define RCbl %cl #define RDbl %dl #define RAbh %ah #define RBbh %bh #define RCbh %ch #define RDbh %dh #define RNA %r8 #define RNB %r9 #define RNC %r10 #define RND %r11 #define RNAd %r8d #define RNBd %r9d #define RNCd %r10d #define RNDd %r11d #define RT0 %rbp #define RT1 %rsi #define RT0d %ebp #define RT1d %esi /* helper macros */ #define do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ movzbl source ## bl, t0 ## d; \ movzbl source ## bh, t1 ## d; \ op ## l table1(RTAB,t0,tablemul), dest1 ## d; \ op ## l table2(RTAB,t1,tablemul), dest2 ## d; #define do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ movzbl source ## bl, t0 ## d; \ movzbl source ## bh, t1 ## d; \ shrl $(shf), source ## d; \ op ## l table1(RTAB,t0,tablemul), dest1 ## d; \ op ## l table2(RTAB,t1,tablemul), dest2 ## d; #define last_do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ movzbl source ## bl, t0 ## d; \ movzbl source ## bh, t1 ## d; \ movzbl table1(RTAB,t0,tablemul), t0 ## d; \ movzbl table2(RTAB,t1,tablemul), t1 ## d; \ op ## l t0 ## d, dest1 ## d; \ op ## l t1 ## d, dest2 ## d; #define last_do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \ movzbl source ## bl, t0 ## d; \ movzbl source ## bh, t1 ## d; \ shrl $(shf), source ## d; \ movzbl table1(RTAB,t0,tablemul), t0 ## d; \ movzbl table2(RTAB,t1,tablemul), t1 ## d; \ op ## l t0 ## d, dest1 ## d; \ op ## l t1 ## d, dest2 ## d; /*********************************************************************** * AMD64 assembly implementation of the AES cipher ***********************************************************************/ #define addroundkey(round, ra, rb, rc, rd) \ xorl (((round) * 16) + 0 * 4)(CTX), ra ## d; \ xorl (((round) * 16) + 1 * 4)(CTX), rb ## d; \ xorl (((round) * 16) + 2 * 4)(CTX), rc ## d; \ xorl (((round) * 16) + 3 * 4)(CTX), rd ## d; #define do_encround(next_r) \ do16bit_shr(16, mov, RA, Esize, E0, RNA, E0, RND, RT0, RT1); \ do16bit( mov, RA, Esize, E0, RNC, E0, RNB, RT0, RT1); \ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ roll $8, RNDd; \ xorl RNAd, RAd; \ roll $8, RNCd; \ roll $8, RNBd; \ roll $8, RAd; \ \ do16bit_shr(16, xor, RD, Esize, E0, RND, E0, RNC, RT0, RT1); \ do16bit( xor, RD, Esize, E0, RNB, E0, RA, RT0, RT1); \ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ roll $8, RNCd; \ xorl RNDd, RDd; \ roll $8, RNBd; \ roll $8, RAd; \ roll $8, RDd; \ \ do16bit_shr(16, xor, RC, Esize, E0, RNC, E0, RNB, RT0, RT1); \ do16bit( xor, RC, Esize, E0, RA, E0, RD, RT0, RT1); \ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ roll $8, RNBd; \ xorl RNCd, RCd; \ roll $8, RAd; \ roll $8, RDd; \ roll $8, RCd; \ \ do16bit_shr(16, xor, RB, Esize, E0, RNB, E0, RA, RT0, RT1); \ do16bit( xor, RB, Esize, E0, RD, E0, RC, RT0, RT1); \ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ roll $8, RAd; \ xorl RNBd, RBd; \ roll $16, RDd; \ roll $24, RCd; #define do_lastencround(next_r) \ do16bit_shr(16, movzb, RA, Essize, Es0, RNA, Es0, RND, RT0, RT1); \ do16bit( movzb, RA, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ roll $8, RNDd; \ xorl RNAd, RAd; \ roll $8, RNCd; \ roll $8, RNBd; \ roll $8, RAd; \ \ last_do16bit_shr(16, xor, RD, Essize, Es0, RND, Es0, RNC, RT0, RT1); \ last_do16bit( xor, RD, Essize, Es0, RNB, Es0, RA, RT0, RT1); \ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ roll $8, RNCd; \ xorl RNDd, RDd; \ roll $8, RNBd; \ roll $8, RAd; \ roll $8, RDd; \ \ last_do16bit_shr(16, xor, RC, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \ last_do16bit( xor, RC, Essize, Es0, RA, Es0, RD, RT0, RT1); \ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ roll $8, RNBd; \ xorl RNCd, RCd; \ roll $8, RAd; \ roll $8, RDd; \ roll $8, RCd; \ \ last_do16bit_shr(16, xor, RB, Essize, Es0, RNB, Es0, RA, RT0, RT1); \ last_do16bit( xor, RB, Essize, Es0, RD, Es0, RC, RT0, RT1); \ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ roll $8, RAd; \ xorl RNBd, RBd; \ roll $16, RDd; \ roll $24, RCd; #define firstencround(round) \ addroundkey(round, RA, RB, RC, RD); \ do_encround((round) + 1); #define encround(round) \ do_encround((round) + 1); #define lastencround(round) \ do_lastencround((round) + 1); .align 8 .globl _gcry_aes_amd64_encrypt_block ELF(.type _gcry_aes_amd64_encrypt_block,@function;) _gcry_aes_amd64_encrypt_block: /* input: * %rdi: keysched, CTX * %rsi: dst * %rdx: src * %ecx: number of rounds.. 10, 12 or 14 * %r8: encryption tables */ + ENTER_SYSV_FUNC_PARAMS_5 + subq $(5 * 8), %rsp; movq %rsi, (0 * 8)(%rsp); movl %ecx, (1 * 8)(%rsp); movq %rbp, (2 * 8)(%rsp); movq %rbx, (3 * 8)(%rsp); movq %r12, (4 * 8)(%rsp); leaq (%r8), RTAB; /* read input block */ movl 0 * 4(%rdx), RAd; movl 1 * 4(%rdx), RBd; movl 2 * 4(%rdx), RCd; movl 3 * 4(%rdx), RDd; firstencround(0); encround(1); encround(2); encround(3); encround(4); encround(5); encround(6); encround(7); encround(8); cmpl $12, (1 * 8)(%rsp); jnb .Lenc_not_128; lastencround(9); .align 4 .Lenc_done: /* write output block */ movq (0 * 8)(%rsp), %rsi; movl RAd, 0 * 4(%rsi); movl RBd, 1 * 4(%rsi); movl RCd, 2 * 4(%rsi); movl RDd, 3 * 4(%rsi); movq (4 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %rbp; addq $(5 * 8), %rsp; movl $(6 * 8), %eax; + + EXIT_SYSV_FUNC ret; .align 4 .Lenc_not_128: je .Lenc_192 encround(9); encround(10); encround(11); encround(12); lastencround(13); jmp .Lenc_done; .align 4 .Lenc_192: encround(9); encround(10); lastencround(11); jmp .Lenc_done; ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;) #define do_decround(next_r) \ do16bit_shr(16, mov, RA, Dsize, D0, RNA, D0, RNB, RT0, RT1); \ do16bit( mov, RA, Dsize, D0, RNC, D0, RND, RT0, RT1); \ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ roll $8, RNBd; \ xorl RNAd, RAd; \ roll $8, RNCd; \ roll $8, RNDd; \ roll $8, RAd; \ \ do16bit_shr(16, xor, RB, Dsize, D0, RNB, D0, RNC, RT0, RT1); \ do16bit( xor, RB, Dsize, D0, RND, D0, RA, RT0, RT1); \ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ roll $8, RNCd; \ xorl RNBd, RBd; \ roll $8, RNDd; \ roll $8, RAd; \ roll $8, RBd; \ \ do16bit_shr(16, xor, RC, Dsize, D0, RNC, D0, RND, RT0, RT1); \ do16bit( xor, RC, Dsize, D0, RA, D0, RB, RT0, RT1); \ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ roll $8, RNDd; \ xorl RNCd, RCd; \ roll $8, RAd; \ roll $8, RBd; \ roll $8, RCd; \ \ do16bit_shr(16, xor, RD, Dsize, D0, RND, D0, RA, RT0, RT1); \ do16bit( xor, RD, Dsize, D0, RB, D0, RC, RT0, RT1); \ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ roll $8, RAd; \ xorl RNDd, RDd; \ roll $16, RBd; \ roll $24, RCd; #define do_lastdecround(next_r) \ do16bit_shr(16, movzb, RA, Dssize, Ds0, RNA, Ds0, RNB, RT0, RT1); \ do16bit( movzb, RA, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \ roll $8, RNBd; \ xorl RNAd, RAd; \ roll $8, RNCd; \ roll $8, RNDd; \ roll $8, RAd; \ \ last_do16bit_shr(16, xor, RB, Dssize, Ds0, RNB, Ds0, RNC, RT0, RT1); \ last_do16bit( xor, RB, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \ roll $8, RNCd; \ xorl RNBd, RBd; \ roll $8, RNDd; \ roll $8, RAd; \ roll $8, RBd; \ \ last_do16bit_shr(16, xor, RC, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \ last_do16bit( xor, RC, Dssize, Ds0, RA, Ds0, RB, RT0, RT1); \ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \ roll $8, RNDd; \ xorl RNCd, RCd; \ roll $8, RAd; \ roll $8, RBd; \ roll $8, RCd; \ \ last_do16bit_shr(16, xor, RD, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \ last_do16bit( xor, RD, Dssize, Ds0, RB, Ds0, RC, RT0, RT1); \ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \ roll $8, RAd; \ xorl RNDd, RDd; \ roll $16, RBd; \ roll $24, RCd; #define firstdecround(round) \ addroundkey((round + 1), RA, RB, RC, RD); \ do_decround(round); #define decround(round) \ do_decround(round); #define lastdecround(round) \ do_lastdecround(round); .align 8 .globl _gcry_aes_amd64_decrypt_block ELF(.type _gcry_aes_amd64_decrypt_block,@function;) _gcry_aes_amd64_decrypt_block: /* input: * %rdi: keysched, CTX * %rsi: dst * %rdx: src * %ecx: number of rounds.. 10, 12 or 14 * %r8: decryption tables */ + ENTER_SYSV_FUNC_PARAMS_5 + subq $(5 * 8), %rsp; movq %rsi, (0 * 8)(%rsp); movl %ecx, (1 * 8)(%rsp); movq %rbp, (2 * 8)(%rsp); movq %rbx, (3 * 8)(%rsp); movq %r12, (4 * 8)(%rsp); leaq (%r8), RTAB; /* read input block */ movl 0 * 4(%rdx), RAd; movl 1 * 4(%rdx), RBd; movl 2 * 4(%rdx), RCd; movl 3 * 4(%rdx), RDd; cmpl $12, (1 * 8)(%rsp); jnb .Ldec_256; firstdecround(9); .align 4 .Ldec_tail: decround(8); decround(7); decround(6); decround(5); decround(4); decround(3); decround(2); decround(1); lastdecround(0); /* write output block */ movq (0 * 8)(%rsp), %rsi; movl RAd, 0 * 4(%rsi); movl RBd, 1 * 4(%rsi); movl RCd, 2 * 4(%rsi); movl RDd, 3 * 4(%rsi); movq (4 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %rbp; addq $(5 * 8), %rsp; movl $(6 * 8), %eax; + + EXIT_SYSV_FUNC ret; .align 4 .Ldec_256: je .Ldec_192; firstdecround(13); decround(12); decround(11); decround(10); decround(9); jmp .Ldec_tail; .align 4 .Ldec_192: firstdecround(11); decround(10); decround(9); jmp .Ldec_tail; ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;) #endif /*USE_AES*/ #endif /*__x86_64*/ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 548bfa09..df1363f2 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,2106 +1,2068 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . ******************************************************************* * The code here is based on the optimized implementation taken from * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, * which carries this notice: *------------------------------------------ * rijndael-alg-fst.c v2.3 April '2000 * * Optimised ANSI C code * * authors: v1.0: Antoon Bosselaers * v2.0: Vincent Rijmen * v2.3: Paulo Barreto * * This code is placed in the public domain. *------------------------------------------ * * The SP800-38a document is available at: * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf * */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AMD64_ASM /* AMD64 assembly implementations of AES */ extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_AMD64_ASM*/ #ifdef USE_AESNI /* AES-NI (AMD64 & i386) accelerated implementations of AES */ extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); #endif #ifdef USE_ARM_ASM /* ARM assembly implementations of AES */ extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_ARM_ASM*/ #ifdef USE_ARM_CE /* ARMv8 Crypto Extension implementations of AES */ extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif /*USE_ARM_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); /* All the numbers. */ #include "rijndael-tables.h" /* Function prototypes. */ static const char *selftest(void); /* Prefetching for encryption/decryption tables. */ static void prefetch_table(const volatile byte *tab, size_t len) { size_t i; for (i = 0; i < len; i += 8 * 32) { (void)tab[i + 0 * 32]; (void)tab[i + 1 * 32]; (void)tab[i + 2 * 32]; (void)tab[i + 3 * 32]; (void)tab[i + 4 * 32]; (void)tab[i + 5 * 32]; (void)tab[i + 6 * 32]; (void)tab[i + 7 * 32]; } (void)tab[len - 1]; } static void prefetch_enc(void) { prefetch_table((const void *)encT, sizeof(encT)); } static void prefetch_dec(void) { prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); } /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) { static int initialized = 0; static const char *selftest_failed = 0; int rounds; int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) unsigned int hwfeatures; #endif /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a failed self-test won't get noticed in another thread. FIXME: We might want to have a central registry of succeeded self-tests. */ if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed ); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if( keylen == 128/8 ) { rounds = 10; KC = 4; } else if ( keylen == 192/8 ) { rounds = 12; KC = 6; } else if ( keylen == 256/8 ) { rounds = 14; KC = 8; } else return GPG_ERR_INV_KEYLEN; ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) hwfeatures = _gcry_get_hw_features (); #endif ctx->decryption_prepared = 0; #ifdef USE_PADLOCK ctx->use_padlock = 0; #endif #ifdef USE_AESNI ctx->use_aesni = 0; #endif #ifdef USE_SSSE3 ctx->use_ssse3 = 0; #endif #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif if (0) { ; } #ifdef USE_AESNI else if (hwfeatures & HWF_INTEL_AESNI) { ctx->encrypt_fn = _gcry_aes_aesni_encrypt; ctx->decrypt_fn = _gcry_aes_aesni_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; } #endif #ifdef USE_PADLOCK else if (hwfeatures & HWF_PADLOCK_AES && keylen == 128/8) { ctx->encrypt_fn = _gcry_aes_padlock_encrypt; ctx->decrypt_fn = _gcry_aes_padlock_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_padlock = 1; memcpy (ctx->padlockkey, key, keylen); } #endif #ifdef USE_SSSE3 else if (hwfeatures & HWF_INTEL_SSSE3) { ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_ssse3 = 1; } #endif #ifdef USE_ARM_CE else if (hwfeatures & HWF_ARM_AES) { ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_arm_ce = 1; } #endif else { ctx->encrypt_fn = do_encrypt; ctx->decrypt_fn = do_decrypt; ctx->prefetch_enc_fn = prefetch_enc; ctx->prefetch_dec_fn = prefetch_dec; } /* NB: We don't yet support Padlock hardware key generation. */ if (0) { ; } #ifdef USE_AESNI else if (ctx->use_aesni) _gcry_aes_aesni_do_setkey (ctx, key); #endif #ifdef USE_SSSE3 else if (ctx->use_ssse3) _gcry_aes_ssse3_do_setkey (ctx, key); #endif #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); #endif else { const byte *sbox = ((const byte *)encT) + 1; union { PROPERLY_ALIGNED_TYPE dummy; byte data[MAXKC][4]; u32 data32[MAXKC]; } tkk[2]; #define k tkk[0].data #define k_u32 tkk[0].data32 #define tk tkk[1].data #define tk_u32 tkk[1].data32 #define W (ctx->keyschenc) #define W_u32 (ctx->keyschenc32) prefetch_enc(); for (i = 0; i < keylen; i++) { k[i >> 2][i & 3] = key[i]; } for (j = KC-1; j >= 0; j--) { tk_u32[j] = k_u32[j]; } r = 0; t = 0; /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } while (r < rounds + 1) { /* While not enough round key material calculated calculate new values. */ tk[0][0] ^= sbox[tk[KC-1][1] * 4]; tk[0][1] ^= sbox[tk[KC-1][2] * 4]; tk[0][2] ^= sbox[tk[KC-1][3] * 4]; tk[0][3] ^= sbox[tk[KC-1][0] * 4]; tk[0][0] ^= rcon[rconpointer++]; if (KC != 8) { for (j = 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } else { for (j = 1; j < KC/2; j++) { tk_u32[j] ^= tk_u32[j-1]; } tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4]; tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4]; tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4]; tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4]; for (j = KC/2 + 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } } #undef W #undef tk #undef k #undef W_u32 #undef tk_u32 #undef k_u32 wipememory(&tkk, sizeof(tkk)); } return 0; } static gcry_err_code_t rijndael_setkey (void *context, const byte *key, const unsigned keylen) { RIJNDAEL_context *ctx = context; return do_setkey (ctx, key, keylen); } /* Make a decryption key from an encryption key. */ static void prepare_decryption( RIJNDAEL_context *ctx ) { int r; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_prepare_decryption (ctx); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_prepare_decryption (ctx); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_prepare_decryption (ctx); } #endif /*USE_SSSE3*/ #ifdef USE_PADLOCK else if (ctx->use_padlock) { /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ else { const byte *sbox = ((const byte *)encT) + 1; prefetch_enc(); prefetch_dec(); ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; for (r = 1; r < ctx->rounds; r++) { u32 *wi = ctx->keyschenc32[r]; u32 *wo = ctx->keyschdec32[r]; u32 wt; wt = wi[0]; wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[1]; wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[2]; wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[3]; wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); } ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; } } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschenc32) const byte *sbox = ((const byte *)encT) + 1; int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[0][0]; sa[1] = sb[1] ^ rk[0][1]; sa[2] = sb[2] ^ rk[0][2]; sa[3] = sb[3] ^ rk[0][3]; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; for (r = 2; r < rounds; r++) { sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r++; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } /* Last round is special. */ sb[0] = (sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); sb[3] = (sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); sb[2] = (sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); sb[1] = (sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= (sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); sa[0] ^= (sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); sb[3] ^= (sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); sb[2] ^= (sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= (sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); sa[1] ^= (sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); sa[0] ^= (sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); sb[3] ^= (sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= (sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); sa[2] ^= (sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); sa[1] ^= (sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); sa[0] ^= (sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); sa[3] = rk[r][3] ^ sb[3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56 + 2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM -# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); -# else - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - const void *key = ctx->keyschenc; - uintptr_t rounds = ctx->rounds; - uintptr_t ret; - asm volatile ("movq %[encT], %%r8\n\t" - "callq *%[ret]\n\t" - : [ret] "=a" (ret), - "+D" (key), - "+S" (bx), - "+d" (ax), - "+c" (rounds) - : "0" (_gcry_aes_amd64_encrypt_block), - [encT] "r" (encT) - : "cc", "memory", "r8", "r9", "r10", "r11"); - return ret; -# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ } static unsigned int rijndael_encrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); return ctx->encrypt_fn (ctx, b, a); } /* Bulk encryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char *last_iv; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; last_iv = iv; for ( ;nblocks; nblocks-- ) { buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); last_iv = outbuf; inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } if (last_iv != iv) buf_cpy (iv, last_iv, BLOCKSIZE); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CTR mode. Caller needs to make sure that CTR is aligned on a 16 byte boundary if AESNI; the minimum alignment is for an u32. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOCKSIZE. */ void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; int i; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ for (i = BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(&tmp, sizeof(tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschdec32) int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[rounds][0]; sa[1] = sb[1] ^ rk[rounds][1]; sa[2] = sb[2] ^ rk[rounds][2]; sa[3] = sb[3] ^ rk[rounds][3]; for (r = rounds - 1; r > 1; r--) { sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r--; sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; /* Last round is special. */ sb[0] = inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); sb[1] = inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); sb[2] = inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); sb[3] = inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); sa[0] = sb[0] ^ rk[0][0]; sb[1] ^= inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); sb[2] ^= inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); sb[3] ^= inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); sa[0] ^= inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); sa[1] = sb[1] ^ rk[0][1]; sb[2] ^= inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); sb[3] ^= inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); sa[0] ^= inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); sa[1] ^= inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); sa[2] = sb[2] ^ rk[0][2]; sb[3] ^= inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); sa[0] ^= inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); sa[1] ^= inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); sa[2] ^= inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); sa[3] = sb[3] ^ rk[0][3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56+2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ /* Decrypt one block. AX and BX may be the same. */ static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM -# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, &dec_tables); -# else - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - const void *key = ctx->keyschdec; - uintptr_t rounds = ctx->rounds; - uintptr_t ret; - asm volatile ("movq %[dectabs], %%r8\n\t" - "callq *%[ret]\n\t" - : [ret] "=a" (ret), - "+D" (key), - "+S" (bx), - "+d" (ax), - "+c" (rounds) - : "0" (_gcry_aes_amd64_decrypt_block), - [dectabs] "r" (&dec_tables) - : "cc", "memory", "r8", "r9", "r10", "r11"); - return ret; -# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, &dec_tables); #else return do_decrypt_fn (ctx, bx, ax); #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ } static inline void check_decryption_preparation (RIJNDAEL_context *ctx) { if ( !ctx->decryption_prepared ) { prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } } static unsigned int rijndael_decrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); return ctx->decrypt_fn (ctx, b, a); } /* Bulk decryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk decryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn_depth = decrypt_fn (ctx, savebuf, inbuf); buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); } if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_ARM_CE*/ else if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk authentication of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } wipememory(&l_tmp, sizeof(l_tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk encryption/decryption of complete blocks in XTS mode. */ void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t crypt_fn; u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); crypt_fn = ctx->encrypt_fn; } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); crypt_fn = ctx->decrypt_fn; } if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_AESNI*/ else { tweak_next_lo = buf_get_le64 (tweak + 0); tweak_next_hi = buf_get_le64 (tweak + 8); while (nblocks) { tweak_lo = tweak_next_lo; tweak_hi = tweak_next_hi; /* Xor-Encrypt/Decrypt-Xor block. */ tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; buf_put_le64 (outbuf + 0, tmp_lo); buf_put_le64 (outbuf + 8, tmp_hi); /* Generate next tweak. */ carry = -(tweak_next_hi >> 63) & 0x87; tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); tweak_next_lo = (tweak_next_lo << 1) ^ carry; burn_depth = crypt_fn (ctx, outbuf, outbuf); buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; nblocks--; } buf_put_le64 (tweak + 0, tweak_next_lo); buf_put_le64 (tweak + 8, tweak_next_hi); } if (burn_depth) _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); } /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; /* The test vectors are from the AES supplied ones; more or less randomly taken from ecb_tbl.txt (I=42,81,14) */ #if 1 static const unsigned char plaintext_128[16] = { 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A }; static const unsigned char key_128[16] = { 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA }; static const unsigned char ciphertext_128[16] = { 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD }; #else /* Test vectors from fips-197, appendix C. */ # warning debug test vectors in use static const unsigned char plaintext_128[16] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const unsigned char key_128[16] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the stack, we need to allocate that context on the heap. */ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_128, sizeof (key_128)); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { xfree (ctxmem); return "AES-128 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; return NULL; } /* Run the self-tests for AES 192. Returns NULL on success. */ static const char* selftest_basic_192 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_192[16] = { 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 }; static unsigned char key_192[24] = { 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 }; static const unsigned char ciphertext_192[16] = { 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_192, sizeof(key_192)); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { xfree (ctxmem); return "AES-192 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) return "AES-192 test decryption failed."; return NULL; } /* Run the self-tests for AES 256. Returns NULL on success. */ static const char* selftest_basic_256 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_256[16] = { 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 }; static unsigned char key_256[32] = { 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E }; static const unsigned char ciphertext_256[16] = { 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_256, sizeof(key_256)); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { xfree (ctxmem); return "AES-256 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "AES-256 test decryption failed."; return NULL; } /* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* selftest_ctr_128 (void) { const int nblocks = 8+1; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_ctr("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cbc_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cbc("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CFB-128, tests bulk CFB decryption. Returns NULL on success. */ static const char* selftest_cfb_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cfb("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cfb_dec, nblocks, blocksize, context_size); } /* Run all the self-tests and return NULL on success. This function is used for the on-the-fly self-tests. */ static const char * selftest (void) { const char *r; if ( (r = selftest_basic_128 ()) || (r = selftest_basic_192 ()) || (r = selftest_basic_256 ()) ) return r; if ( (r = selftest_ctr_128 ()) ) return r; if ( (r = selftest_cbc_128 ()) ) return r; if ( (r = selftest_cfb_128 ()) ) return r; return r; } /* SP800-38a.pdf for AES-128. */ static const char * selftest_fips_128_38a (int requested_mode) { static const struct tv { int mode; const unsigned char key[16]; const unsigned char iv[16]; struct { const unsigned char input[16]; const unsigned char output[16]; } data[4]; } tv[2] = { { GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } } }, { GCRY_CIPHER_MODE_OFB, { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, } } }; unsigned char scratch[16]; gpg_error_t err; int tvi, idx; gcry_cipher_hd_t hdenc = NULL; gcry_cipher_hd_t hddec = NULL; #define Fail(a) do { \ _gcry_cipher_close (hdenc); \ _gcry_cipher_close (hddec); \ return a; \ } while (0) gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); for (tvi=0; tvi < DIM (tv); tvi++) if (tv[tvi].mode == requested_mode) break; if (tvi == DIM (tv)) Fail ("no test data for this mode"); err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); if (!err) err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); if (err) Fail ("set key"); err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); if (!err) err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); if (err) Fail ("set IV"); for (idx=0; idx < DIM (tv[tvi].data); idx++) { err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, tv[tvi].data[idx].input, sizeof tv[tvi].data[idx].input); if (err) Fail ("encrypt command"); if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) Fail ("encrypt mismatch"); err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, tv[tvi].data[idx].output, sizeof tv[tvi].data[idx].output); if (err) Fail ("decrypt command"); if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) Fail ("decrypt mismatch"); } #undef Fail _gcry_cipher_close (hdenc); _gcry_cipher_close (hddec); return NULL; } /* Complete selftest for AES-128 with all modes and driver code. */ static gpg_err_code_t selftest_fips_128 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "low-level"; errtxt = selftest_basic_128 (); if (errtxt) goto failed; if (extended) { what = "cfb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); if (errtxt) goto failed; what = "ofb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES128, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-192. */ static gpg_err_code_t selftest_fips_192 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_192 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES192, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-256. */ static gpg_err_code_t selftest_fips_256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_256 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_AES128: ec = selftest_fips_128 (extended, report); break; case GCRY_CIPHER_AES192: ec = selftest_fips_192 (extended, report); break; case GCRY_CIPHER_AES256: ec = selftest_fips_256 (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } static const char *rijndael_names[] = { "RIJNDAEL", "AES128", "AES-128", NULL }; static gcry_cipher_oid_spec_t rijndael_oids[] = { { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes = { GCRY_CIPHER_AES, {0, 1}, "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael192_names[] = { "RIJNDAEL192", "AES-192", NULL }; static gcry_cipher_oid_spec_t rijndael192_oids[] = { { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes192 = { GCRY_CIPHER_AES192, {0, 1}, "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael256_names[] = { "RIJNDAEL256", "AES-256", NULL }; static gcry_cipher_oid_spec_t rijndael256_oids[] = { { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes256 = { GCRY_CIPHER_AES256, {0, 1}, "AES256", rijndael256_names, rijndael256_oids, 16, 256, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index aa964e03..7a836463 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -1,1046 +1,1060 @@ /* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher * * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifdef __x86_64 #include #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - -#ifdef __PIC__ -# define RIP %rip -#else -# define RIP -#endif +#include "asm-common-amd64.h" .text /* structure of TWOFISH_context: */ #define s0 0 #define s1 ((s0) + 4 * 256) #define s2 ((s1) + 4 * 256) #define s3 ((s2) + 4 * 256) #define w ((s3) + 4 * 256) #define k ((w) + 4 * 8) /* register macros */ #define CTX %rdi #define RA %rax #define RB %rbx #define RC %rcx #define RD %rdx #define RAd %eax #define RBd %ebx #define RCd %ecx #define RDd %edx #define RAbl %al #define RBbl %bl #define RCbl %cl #define RDbl %dl #define RAbh %ah #define RBbh %bh #define RCbh %ch #define RDbh %dh #define RX %r8 #define RY %r9 #define RXd %r8d #define RYd %r9d #define RT0 %rsi #define RT1 %rbp #define RT2 %r10 #define RT3 %r11 #define RT0d %esi #define RT1d %ebp #define RT2d %r10d #define RT3d %r11d /*********************************************************************** * AMD64 assembly implementation of the Twofish cipher ***********************************************************************/ #define enc_g1_2(a, b, x, y) \ movzbl b ## bl, RT3d; \ movzbl b ## bh, RT1d; \ movzbl a ## bl, RT2d; \ movzbl a ## bh, RT0d; \ rorl $16, b ## d; \ rorl $16, a ## d; \ movl s1(CTX, RT3, 4), RYd; \ movzbl b ## bl, RT3d; \ movl s0(CTX, RT2, 4), RXd; \ movzbl a ## bl, RT2d; \ xorl s2(CTX, RT1, 4), RYd; \ movzbl b ## bh, RT1d; \ xorl s1(CTX, RT0, 4), RXd; \ movzbl a ## bh, RT0d; \ rorl $16, b ## d; \ rorl $16, a ## d; \ xorl s3(CTX, RT3, 4), RYd; \ xorl s2(CTX, RT2, 4), RXd; \ xorl s0(CTX, RT1, 4), RYd; \ xorl s3(CTX, RT0, 4), RXd; #define dec_g1_2(a, b, x, y) \ movzbl a ## bl, RT2d; \ movzbl a ## bh, RT0d; \ movzbl b ## bl, RT3d; \ movzbl b ## bh, RT1d; \ rorl $16, a ## d; \ rorl $16, b ## d; \ movl s0(CTX, RT2, 4), RXd; \ movzbl a ## bl, RT2d; \ movl s1(CTX, RT3, 4), RYd; \ movzbl b ## bl, RT3d; \ xorl s1(CTX, RT0, 4), RXd; \ movzbl a ## bh, RT0d; \ xorl s2(CTX, RT1, 4), RYd; \ movzbl b ## bh, RT1d; \ rorl $16, a ## d; \ rorl $16, b ## d; \ xorl s2(CTX, RT2, 4), RXd; \ xorl s3(CTX, RT3, 4), RYd; \ xorl s3(CTX, RT0, 4), RXd; \ xorl s0(CTX, RT1, 4), RYd; #define encrypt_round(ra, rb, rc, rd, n) \ enc_g1_2(##ra, ##rb, RX, RY); \ \ leal (RXd, RYd, 2), RT0d; \ addl RYd, RXd; \ addl (k + 8 * (n) + 4)(CTX), RT0d; \ roll $1, rd ## d; \ addl (k + 8 * (n))(CTX), RXd; \ xorl RT0d, rd ## d; \ xorl RXd, rc ## d; \ rorl $1, rc ## d; #define decrypt_round(ra, rb, rc, rd, n) \ dec_g1_2(##ra, ##rb, RX, RY); \ \ leal (RXd, RYd, 2), RT0d; \ addl RYd, RXd; \ addl (k + 8 * (n) + 4)(CTX), RT0d; \ roll $1, rc ## d; \ addl (k + 8 * (n))(CTX), RXd; \ xorl RXd, rc ## d; \ xorl RT0d, rd ## d; \ rorl $1, rd ## d; #define encrypt_cycle(a, b, c, d, nc) \ encrypt_round(##a, ##b, ##c, ##d, (nc) * 2); \ encrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); #define decrypt_cycle(a, b, c, d, nc) \ decrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); \ decrypt_round(##a, ##b, ##c, ##d, (nc) * 2); #define inpack(in, n, x, m) \ movl (4 * (n))(in), x; \ xorl (w + 4 * (m))(CTX), x; #define outunpack(out, n, x, m) \ xorl (w + 4 * (m))(CTX), x; \ movl x, (4 * (n))(out); .align 8 .globl _gcry_twofish_amd64_encrypt_block ELF(.type _gcry_twofish_amd64_encrypt_block,@function;) _gcry_twofish_amd64_encrypt_block: /* input: * %rdi: context, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + subq $(3 * 8), %rsp; movq %rsi, (0 * 8)(%rsp); movq %rbp, (1 * 8)(%rsp); movq %rbx, (2 * 8)(%rsp); movq %rdx, RX; inpack(RX, 0, RAd, 0); inpack(RX, 1, RBd, 1); inpack(RX, 2, RCd, 2); inpack(RX, 3, RDd, 3); encrypt_cycle(RA, RB, RC, RD, 0); encrypt_cycle(RA, RB, RC, RD, 1); encrypt_cycle(RA, RB, RC, RD, 2); encrypt_cycle(RA, RB, RC, RD, 3); encrypt_cycle(RA, RB, RC, RD, 4); encrypt_cycle(RA, RB, RC, RD, 5); encrypt_cycle(RA, RB, RC, RD, 6); encrypt_cycle(RA, RB, RC, RD, 7); movq (0 * 8)(%rsp), RX; /*dst*/ outunpack(RX, 0, RCd, 4); outunpack(RX, 1, RDd, 5); outunpack(RX, 2, RAd, 6); outunpack(RX, 3, RBd, 7); movq (2 * 8)(%rsp), %rbx; movq (1 * 8)(%rsp), %rbp; addq $(3 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) .align 8 .globl _gcry_twofish_amd64_decrypt_block ELF(.type _gcry_twofish_amd64_decrypt_block,@function;) _gcry_twofish_amd64_decrypt_block: /* input: * %rdi: context, CTX * %rsi: dst * %rdx: src */ + ENTER_SYSV_FUNC_PARAMS_0_4 + subq $(3 * 8), %rsp; movq %rsi, (0 * 8)(%rsp); movq %rbp, (1 * 8)(%rsp); movq %rbx, (2 * 8)(%rsp); movq %rdx, RX; inpack(RX, 0, RCd, 4); inpack(RX, 1, RDd, 5); inpack(RX, 2, RAd, 6); inpack(RX, 3, RBd, 7); decrypt_cycle(RA, RB, RC, RD, 7); decrypt_cycle(RA, RB, RC, RD, 6); decrypt_cycle(RA, RB, RC, RD, 5); decrypt_cycle(RA, RB, RC, RD, 4); decrypt_cycle(RA, RB, RC, RD, 3); decrypt_cycle(RA, RB, RC, RD, 2); decrypt_cycle(RA, RB, RC, RD, 1); decrypt_cycle(RA, RB, RC, RD, 0); movq (0 * 8)(%rsp), RX; /*dst*/ outunpack(RX, 0, RAd, 0); outunpack(RX, 1, RBd, 1); outunpack(RX, 2, RCd, 2); outunpack(RX, 3, RDd, 3); movq (2 * 8)(%rsp), %rbx; movq (1 * 8)(%rsp), %rbp; addq $(3 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) #undef CTX #undef RA #undef RB #undef RC #undef RD #undef RAd #undef RBd #undef RCd #undef RDd #undef RAbl #undef RBbl #undef RCbl #undef RDbl #undef RAbh #undef RBbh #undef RCbh #undef RDbh #undef RX #undef RY #undef RXd #undef RYd #undef RT0 #undef RT1 #undef RT2 #undef RT3 #undef RT0d #undef RT1d #undef RT2d #undef RT3d /*********************************************************************** * AMD64 assembly implementation of the Twofish cipher, 3-way parallel ***********************************************************************/ #define CTX %rdi #define RIO %rdx #define RAB0 %rax #define RAB1 %rbx #define RAB2 %rcx #define RAB0d %eax #define RAB1d %ebx #define RAB2d %ecx #define RAB0bh %ah #define RAB1bh %bh #define RAB2bh %ch #define RAB0bl %al #define RAB1bl %bl #define RAB2bl %cl #define RCD0 %r8 #define RCD1 %r9 #define RCD2 %r10 #define RCD0d %r8d #define RCD1d %r9d #define RCD2d %r10d #define RX0 %rbp #define RX1 %r11 #define RX2 %r12 #define RX0d %ebp #define RX1d %r11d #define RX2d %r12d #define RY0 %r13 #define RY1 %r14 #define RY2 %r15 #define RY0d %r13d #define RY1d %r14d #define RY2d %r15d #define RT0 %rdx #define RT1 %rsi #define RT0d %edx #define RT1d %esi #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ movzbl ab ## bl, tmp2 ## d; \ movzbl ab ## bh, tmp1 ## d; \ rorq $(rot), ab; \ op1##l T0(CTX, tmp2, 4), dst ## d; \ op2##l T1(CTX, tmp1, 4), dst ## d; /* * Combined G1 & G2 function. Reordered with help of rotates to have moves * at beginning. */ #define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ /* G1,1 && G2,1 */ \ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ \ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ \ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ \ /* G1,2 && G2,2 */ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ xchgq cd ## 0, ab ## 0; \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ xchgq cd ## 1, ab ## 1; \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ xchgq cd ## 2, ab ## 2; #define enc_round_end(ab, x, y, n) \ addl y ## d, x ## d; \ addl x ## d, y ## d; \ addl k+4*(2*(n))(CTX), x ## d; \ xorl ab ## d, x ## d; \ addl k+4*(2*(n)+1)(CTX), y ## d; \ shrq $32, ab; \ roll $1, ab ## d; \ xorl y ## d, ab ## d; \ shlq $32, ab; \ rorl $1, x ## d; \ orq x, ab; #define dec_round_end(ba, x, y, n) \ addl y ## d, x ## d; \ addl x ## d, y ## d; \ addl k+4*(2*(n))(CTX), x ## d; \ addl k+4*(2*(n)+1)(CTX), y ## d; \ xorl ba ## d, y ## d; \ shrq $32, ba; \ roll $1, ba ## d; \ xorl x ## d, ba ## d; \ shlq $32, ba; \ rorl $1, y ## d; \ orq y, ba; #define encrypt_round3(ab, cd, n) \ g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ \ enc_round_end(ab ## 0, RX0, RY0, n); \ enc_round_end(ab ## 1, RX1, RY1, n); \ enc_round_end(ab ## 2, RX2, RY2, n); #define decrypt_round3(ba, dc, n) \ g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ \ dec_round_end(ba ## 0, RX0, RY0, n); \ dec_round_end(ba ## 1, RX1, RY1, n); \ dec_round_end(ba ## 2, RX2, RY2, n); #define encrypt_cycle3(ab, cd, n) \ encrypt_round3(ab, cd, n*2); \ encrypt_round3(ab, cd, (n*2)+1); #define decrypt_cycle3(ba, dc, n) \ decrypt_round3(ba, dc, (n*2)+1); \ decrypt_round3(ba, dc, (n*2)); #define inpack3(xy, m) \ xorq w+4*m(CTX), xy ## 0; \ xorq w+4*m(CTX), xy ## 1; \ xorq w+4*m(CTX), xy ## 2; #define outunpack3(xy, m) \ xorq w+4*m(CTX), xy ## 0; \ xorq w+4*m(CTX), xy ## 1; \ xorq w+4*m(CTX), xy ## 2; #define inpack_enc3() \ inpack3(RAB, 0); \ inpack3(RCD, 2); #define outunpack_enc3() \ outunpack3(RAB, 6); \ outunpack3(RCD, 4); #define inpack_dec3() \ inpack3(RAB, 4); \ rorq $32, RAB0; \ rorq $32, RAB1; \ rorq $32, RAB2; \ inpack3(RCD, 6); \ rorq $32, RCD0; \ rorq $32, RCD1; \ rorq $32, RCD2; #define outunpack_dec3() \ rorq $32, RCD0; \ rorq $32, RCD1; \ rorq $32, RCD2; \ outunpack3(RCD, 0); \ rorq $32, RAB0; \ rorq $32, RAB1; \ rorq $32, RAB2; \ outunpack3(RAB, 2); .align 8 ELF(.type __twofish_enc_blk3,@function;) __twofish_enc_blk3: /* input: * %rdi: ctx, CTX * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three plaintext blocks * output: * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks */ inpack_enc3(); encrypt_cycle3(RAB, RCD, 0); encrypt_cycle3(RAB, RCD, 1); encrypt_cycle3(RAB, RCD, 2); encrypt_cycle3(RAB, RCD, 3); encrypt_cycle3(RAB, RCD, 4); encrypt_cycle3(RAB, RCD, 5); encrypt_cycle3(RAB, RCD, 6); encrypt_cycle3(RAB, RCD, 7); outunpack_enc3(); ret; ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;) .align 8 ELF(.type __twofish_dec_blk3,@function;) __twofish_dec_blk3: /* input: * %rdi: ctx, CTX * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three ciphertext blocks * output: * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks */ inpack_dec3(); decrypt_cycle3(RAB, RCD, 7); decrypt_cycle3(RAB, RCD, 6); decrypt_cycle3(RAB, RCD, 5); decrypt_cycle3(RAB, RCD, 4); decrypt_cycle3(RAB, RCD, 3); decrypt_cycle3(RAB, RCD, 2); decrypt_cycle3(RAB, RCD, 1); decrypt_cycle3(RAB, RCD, 0); outunpack_dec3(); ret; ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;) .align 8 .globl _gcry_twofish_amd64_ctr_enc ELF(.type _gcry_twofish_amd64_ctr_enc,@function;) _gcry_twofish_amd64_ctr_enc: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (big endian, 128bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + subq $(8 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); movq %rcx, RX0; /* load IV and byteswap */ movq 8(RX0), RT0; movq 0(RX0), RT1; movq RT0, RCD0; movq RT1, RAB0; bswapq RT0; bswapq RT1; /* construct IVs */ movq RT0, RCD1; movq RT1, RAB1; movq RT0, RCD2; movq RT1, RAB2; addq $1, RCD1; adcq $0, RAB1; bswapq RCD1; bswapq RAB1; addq $2, RCD2; adcq $0, RAB2; bswapq RCD2; bswapq RAB2; addq $3, RT0; adcq $0, RT1; bswapq RT0; bswapq RT1; /* store new IV */ movq RT0, 8(RX0); movq RT1, 0(RX0); call __twofish_enc_blk3; movq (7 * 8)(%rsp), RX0; /*src*/ movq (6 * 8)(%rsp), RX1; /*dst*/ /* XOR key-stream with plaintext */ xorq (0 * 8)(RX0), RCD0; xorq (1 * 8)(RX0), RAB0; xorq (2 * 8)(RX0), RCD1; xorq (3 * 8)(RX0), RAB1; xorq (4 * 8)(RX0), RCD2; xorq (5 * 8)(RX0), RAB2; movq RCD0, (0 * 8)(RX1); movq RAB0, (1 * 8)(RX1); movq RCD1, (2 * 8)(RX1); movq RAB1, (3 * 8)(RX1); movq RCD2, (4 * 8)(RX1); movq RAB2, (5 * 8)(RX1); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(8 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;) .align 8 .globl _gcry_twofish_amd64_cbc_dec ELF(.type _gcry_twofish_amd64_cbc_dec,@function;) _gcry_twofish_amd64_cbc_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (128bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + subq $(9 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); movq %rcx, (8 * 8)(%rsp); movq %rdx, RX0; /* load input */ movq (0 * 8)(RX0), RAB0; movq (1 * 8)(RX0), RCD0; movq (2 * 8)(RX0), RAB1; movq (3 * 8)(RX0), RCD1; movq (4 * 8)(RX0), RAB2; movq (5 * 8)(RX0), RCD2; call __twofish_dec_blk3; movq (8 * 8)(%rsp), RT0; /*iv*/ movq (7 * 8)(%rsp), RX0; /*src*/ movq (6 * 8)(%rsp), RX1; /*dst*/ movq (4 * 8)(RX0), RY0; movq (5 * 8)(RX0), RY1; xorq (0 * 8)(RT0), RCD0; xorq (1 * 8)(RT0), RAB0; xorq (0 * 8)(RX0), RCD1; xorq (1 * 8)(RX0), RAB1; xorq (2 * 8)(RX0), RCD2; xorq (3 * 8)(RX0), RAB2; movq RY0, (0 * 8)(RT0); movq RY1, (1 * 8)(RT0); movq RCD0, (0 * 8)(RX1); movq RAB0, (1 * 8)(RX1); movq RCD1, (2 * 8)(RX1); movq RAB1, (3 * 8)(RX1); movq RCD2, (4 * 8)(RX1); movq RAB2, (5 * 8)(RX1); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(9 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;) .align 8 .globl _gcry_twofish_amd64_cfb_dec ELF(.type _gcry_twofish_amd64_cfb_dec,@function;) _gcry_twofish_amd64_cfb_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: iv (128bit) */ + ENTER_SYSV_FUNC_PARAMS_0_4 + subq $(8 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rsi, (6 * 8)(%rsp); movq %rdx, (7 * 8)(%rsp); movq %rdx, RX0; movq %rcx, RX1; /* load input */ movq (0 * 8)(RX1), RAB0; movq (1 * 8)(RX1), RCD0; movq (0 * 8)(RX0), RAB1; movq (1 * 8)(RX0), RCD1; movq (2 * 8)(RX0), RAB2; movq (3 * 8)(RX0), RCD2; /* Update IV */ movq (4 * 8)(RX0), RY0; movq (5 * 8)(RX0), RY1; movq RY0, (0 * 8)(RX1); movq RY1, (1 * 8)(RX1); call __twofish_enc_blk3; movq (7 * 8)(%rsp), RX0; /*src*/ movq (6 * 8)(%rsp), RX1; /*dst*/ xorq (0 * 8)(RX0), RCD0; xorq (1 * 8)(RX0), RAB0; xorq (2 * 8)(RX0), RCD1; xorq (3 * 8)(RX0), RAB1; xorq (4 * 8)(RX0), RCD2; xorq (5 * 8)(RX0), RAB2; movq RCD0, (0 * 8)(RX1); movq RAB0, (1 * 8)(RX1); movq RCD1, (2 * 8)(RX1); movq RAB1, (3 * 8)(RX1); movq RCD2, (4 * 8)(RX1); movq RAB2, (5 * 8)(RX1); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(8 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) .align 8 .globl _gcry_twofish_amd64_ocb_enc ELF(.type _gcry_twofish_amd64_ocb_enc,@function;) _gcry_twofish_amd64_ocb_enc: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: offset * %r8 : checksum * %r9 : L pointers (void *L[3]) */ + ENTER_SYSV_FUNC_PARAMS_6 + subq $(8 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rsi, (6 * 8)(%rsp); movq %rdx, RX0; movq %rcx, RX1; movq %r8, RX2; movq %r9, RY0; movq %rsi, RY1; /* Load offset */ movq (0 * 8)(RX1), RT0; movq (1 * 8)(RX1), RT1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq (RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (0 * 8)(RX0), RAB0; movq (1 * 8)(RX0), RCD0; /* Store Offset_i */ movq RT0, (0 * 8)(RY1); movq RT1, (1 * 8)(RY1); /* Checksum_i = Checksum_{i-1} xor P_i */ xor RAB0, (0 * 8)(RX2); xor RCD0, (1 * 8)(RX2); /* PX_i = P_i xor Offset_i */ xorq RT0, RAB0; xorq RT1, RCD0; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 8(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (2 * 8)(RX0), RAB1; movq (3 * 8)(RX0), RCD1; /* Store Offset_i */ movq RT0, (2 * 8)(RY1); movq RT1, (3 * 8)(RY1); /* Checksum_i = Checksum_{i-1} xor P_i */ xor RAB1, (0 * 8)(RX2); xor RCD1, (1 * 8)(RX2); /* PX_i = P_i xor Offset_i */ xorq RT0, RAB1; xorq RT1, RCD1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 16(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (4 * 8)(RX0), RAB2; movq (5 * 8)(RX0), RCD2; /* Store Offset_i */ movq RT0, (4 * 8)(RY1); movq RT1, (5 * 8)(RY1); /* Checksum_i = Checksum_{i-1} xor P_i */ xor RAB2, (0 * 8)(RX2); xor RCD2, (1 * 8)(RX2); /* PX_i = P_i xor Offset_i */ xorq RT0, RAB2; xorq RT1, RCD2; /* Store offset */ movq RT0, (0 * 8)(RX1); movq RT1, (1 * 8)(RX1); /* CX_i = ENCIPHER(K, PX_i) */ call __twofish_enc_blk3; movq (6 * 8)(%rsp), RX1; /*dst*/ /* C_i = CX_i xor Offset_i */ xorq RCD0, (0 * 8)(RX1); xorq RAB0, (1 * 8)(RX1); xorq RCD1, (2 * 8)(RX1); xorq RAB1, (3 * 8)(RX1); xorq RCD2, (4 * 8)(RX1); xorq RAB2, (5 * 8)(RX1); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(8 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;) .align 8 .globl _gcry_twofish_amd64_ocb_dec ELF(.type _gcry_twofish_amd64_ocb_dec,@function;) _gcry_twofish_amd64_ocb_dec: /* input: * %rdi: ctx, CTX * %rsi: dst (3 blocks) * %rdx: src (3 blocks) * %rcx: offset * %r8 : checksum * %r9 : L pointers (void *L[3]) */ + ENTER_SYSV_FUNC_PARAMS_6 + subq $(8 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rsi, (6 * 8)(%rsp); movq %r8, (7 * 8)(%rsp); movq %rdx, RX0; movq %rcx, RX1; movq %r9, RY0; movq %rsi, RY1; /* Load offset */ movq (0 * 8)(RX1), RT0; movq (1 * 8)(RX1), RT1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq (RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (0 * 8)(RX0), RAB0; movq (1 * 8)(RX0), RCD0; /* Store Offset_i */ movq RT0, (0 * 8)(RY1); movq RT1, (1 * 8)(RY1); /* CX_i = C_i xor Offset_i */ xorq RT0, RAB0; xorq RT1, RCD0; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 8(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (2 * 8)(RX0), RAB1; movq (3 * 8)(RX0), RCD1; /* Store Offset_i */ movq RT0, (2 * 8)(RY1); movq RT1, (3 * 8)(RY1); /* PX_i = P_i xor Offset_i */ xorq RT0, RAB1; xorq RT1, RCD1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 16(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (4 * 8)(RX0), RAB2; movq (5 * 8)(RX0), RCD2; /* Store Offset_i */ movq RT0, (4 * 8)(RY1); movq RT1, (5 * 8)(RY1); /* PX_i = P_i xor Offset_i */ xorq RT0, RAB2; xorq RT1, RCD2; /* Store offset */ movq RT0, (0 * 8)(RX1); movq RT1, (1 * 8)(RX1); /* PX_i = DECIPHER(K, CX_i) */ call __twofish_dec_blk3; movq (7 * 8)(%rsp), RX2; /*checksum*/ movq (6 * 8)(%rsp), RX1; /*dst*/ /* Load checksum */ movq (0 * 8)(RX2), RT0; movq (1 * 8)(RX2), RT1; /* P_i = PX_i xor Offset_i */ xorq RCD0, (0 * 8)(RX1); xorq RAB0, (1 * 8)(RX1); xorq RCD1, (2 * 8)(RX1); xorq RAB1, (3 * 8)(RX1); xorq RCD2, (4 * 8)(RX1); xorq RAB2, (5 * 8)(RX1); /* Checksum_i = Checksum_{i-1} xor P_i */ xorq (0 * 8)(RX1), RT0; xorq (1 * 8)(RX1), RT1; xorq (2 * 8)(RX1), RT0; xorq (3 * 8)(RX1), RT1; xorq (4 * 8)(RX1), RT0; xorq (5 * 8)(RX1), RT1; /* Store checksum */ movq RT0, (0 * 8)(RX2); movq RT1, (1 * 8)(RX2); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(8 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;) .align 8 .globl _gcry_twofish_amd64_ocb_auth ELF(.type _gcry_twofish_amd64_ocb_auth,@function;) _gcry_twofish_amd64_ocb_auth: /* input: * %rdi: ctx, CTX * %rsi: abuf (3 blocks) * %rdx: offset * %rcx: checksum * %r8 : L pointers (void *L[3]) */ + ENTER_SYSV_FUNC_PARAMS_5 + subq $(8 * 8), %rsp; movq %rbp, (0 * 8)(%rsp); movq %rbx, (1 * 8)(%rsp); movq %r12, (2 * 8)(%rsp); movq %r13, (3 * 8)(%rsp); movq %r14, (4 * 8)(%rsp); movq %r15, (5 * 8)(%rsp); movq %rcx, (6 * 8)(%rsp); movq %rsi, RX0; movq %rdx, RX1; movq %r8, RY0; /* Load offset */ movq (0 * 8)(RX1), RT0; movq (1 * 8)(RX1), RT1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq (RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (0 * 8)(RX0), RAB0; movq (1 * 8)(RX0), RCD0; /* PX_i = P_i xor Offset_i */ xorq RT0, RAB0; xorq RT1, RCD0; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 8(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (2 * 8)(RX0), RAB1; movq (3 * 8)(RX0), RCD1; /* PX_i = P_i xor Offset_i */ xorq RT0, RAB1; xorq RT1, RCD1; /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ movq 16(RY0), RY2; xorq (0 * 8)(RY2), RT0; xorq (1 * 8)(RY2), RT1; movq (4 * 8)(RX0), RAB2; movq (5 * 8)(RX0), RCD2; /* PX_i = P_i xor Offset_i */ xorq RT0, RAB2; xorq RT1, RCD2; /* Store offset */ movq RT0, (0 * 8)(RX1); movq RT1, (1 * 8)(RX1); /* C_i = ENCIPHER(K, PX_i) */ call __twofish_enc_blk3; movq (6 * 8)(%rsp), RX1; /*checksum*/ /* Checksum_i = C_i xor Checksum_i */ xorq RCD0, RCD1; xorq RAB0, RAB1; xorq RCD1, RCD2; xorq RAB1, RAB2; xorq RCD2, (0 * 8)(RX1); xorq RAB2, (1 * 8)(RX1); movq (0 * 8)(%rsp), %rbp; movq (1 * 8)(%rsp), %rbx; movq (2 * 8)(%rsp), %r12; movq (3 * 8)(%rsp), %r13; movq (4 * 8)(%rsp), %r14; movq (5 * 8)(%rsp), %r15; addq $(8 * 8), %rsp; + EXIT_SYSV_FUNC ret; ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;) #endif /*USE_TWOFISH*/ #endif /*__x86_64*/ diff --git a/cipher/twofish.c b/cipher/twofish.c index 942e8d42..48feaae9 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -1,1860 +1,1773 @@ /* Twofish for GPG * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc. * Written by Matthew Skala , July 26, 1998 * 256-bit key length added March 20, 1999 * Some modifications to reduce the text size by Werner Koch, April, 1998 * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ******************************************************************** * * This code is a "clean room" implementation, written from the paper * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available * through http://www.counterpane.com/twofish.html * * For background information on multiplication in finite fields, used for * the matrix operations in the key schedule, see the book _Contemporary * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the * Third Edition. * * Only the 128- and 256-bit key sizes are supported. This code is intended * for GNU C on a 32-bit system, but it should work almost anywhere. Loops * are unrolled, precomputation tables are used, etc., for maximum speed at * some cost in memory consumption. */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" #define TWOFISH_BLOCKSIZE 16 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif /* USE_ARM_ASM indicates whether to use ARM assembly code. */ #undef USE_ARM_ASM #if defined(__ARMEL__) # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) # define USE_ARM_ASM 1 # endif #endif # if defined(__AARCH64EL__) # ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS # define USE_ARM_ASM 1 # endif # endif /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ #undef USE_AVX2 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # if defined(ENABLE_AVX2_SUPPORT) # define USE_AVX2 1 # endif #endif /* Prototype for the self-test function. */ static const char *selftest(void); /* Structure for an expanded Twofish key. s contains the key-dependent * S-boxes composed with the MDS matrix; w contains the eight "whitening" * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note * that k[i] corresponds to what the Twofish paper calls K[i+8]. */ typedef struct { u32 s[4][256], w[8], k[32]; #ifdef USE_AVX2 int use_avx2; #endif } TWOFISH_context; /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #if defined(USE_AVX2) # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # else # define ASM_FUNC_ABI # endif #endif /* These two tables are the q0 and q1 permutations, exactly as described in * the Twofish paper. */ static const byte q0[256] = { 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, 0x4A, 0x5E, 0xC1, 0xE0 }; static const byte q1[256] = { 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, 0x55, 0x09, 0xBE, 0x91 }; /* These MDS tables are actually tables of MDS composed with q0 and q1, * because it is only ever used that way and we can save some time by * precomputing. Of course the main saving comes from precomputing the * GF(2^8) multiplication involved in the MDS matrix multiply; by looking * things up in these tables we reduce the matrix multiply to four lookups * and three XORs. Semi-formally, the definition of these tables is: * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T * where ^T means "transpose", the matrix multiply is performed in GF(2^8) * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described * by Schneier et al, and I'm casually glossing over the byte/word * conversion issues. */ static const u32 mds[4][256] = { {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91}, {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8}, {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF}, {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8} }; /* The exp_to_poly and poly_to_exp tables are used to perform efficient * operations in GF(2^8) represented as GF(2)[x]/w(x) where * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the * definition of the RS matrix in the key schedule. Elements of that field * are polynomials of degree not greater than 7 and all coefficients 0 or 1, * which can be represented naturally by bytes (just substitute x=2). In that * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8) * multiplication is inefficient without hardware support. To multiply * faster, I make use of the fact x is a generator for the nonzero elements, * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for * some n in 0..254. Note that that caret is exponentiation in GF(2^8), * *not* polynomial notation. So if I want to compute pq where p and q are * in GF(2^8), I can just say: * 1. if p=0 or q=0 then pq=0 * 2. otherwise, find m and n such that p=x^m and q=x^n * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq * The translations in steps 2 and 3 are looked up in the tables * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this * in action, look at the CALC_S macro. As additional wrinkles, note that * one of my operands is always a constant, so the poly_to_exp lookup on it * is done in advance; I included the original values in the comments so * readers can have some chance of recognizing that this *is* the RS matrix * from the Twofish paper. I've only included the table entries I actually * need; I never do a lookup on a variable input of zero and the biggest * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll * never sum to more than 491. I'm repeating part of the exp_to_poly table * so that I don't have to do mod-255 reduction in the exponent arithmetic. * Since I know my constant operands are never zero, I only have to worry * about zero values in the variable operand, and I do it with a simple * conditional branch. I know conditionals are expensive, but I couldn't * see a non-horrible way of avoiding them, and I did manage to group the * statements so that each if covers four group multiplications. */ static const u16 poly_to_exp[256] = { 492, 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, 0x85, 0xC8, 0xA1 }; static const byte exp_to_poly[492 + 256] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, }; /* The table constants are indices of * S-box entries, preprocessed through q0 and q1. */ static byte calc_sb_tbl[512] = { 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4, 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8, 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B, 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B, 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD, 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1, 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B, 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F, 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B, 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D, 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E, 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5, 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14, 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3, 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54, 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51, 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A, 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96, 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10, 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C, 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7, 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70, 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB, 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8, 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF, 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC, 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF, 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2, 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82, 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9, 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97, 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17, 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D, 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3, 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C, 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E, 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F, 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49, 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21, 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9, 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD, 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01, 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F, 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48, 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E, 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19, 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57, 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64, 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE, 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5, 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44, 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69, 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15, 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E, 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34, 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC, 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B, 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB, 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52, 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9, 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4, 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2, 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 }; /* Macro to perform one column of the RS matrix multiplication. The * parameters a, b, c, and d are the four bytes of output; i is the index * of the key bytes, and w, x, y, and z, are the column of constants from * the RS matrix, preprocessed through the poly_to_exp table. */ #define CALC_S(a, b, c, d, i, w, x, y, z) \ { \ tmp = poly_to_exp[key[i]]; \ (a) ^= exp_to_poly[tmp + (w)]; \ (b) ^= exp_to_poly[tmp + (x)]; \ (c) ^= exp_to_poly[tmp + (y)]; \ (d) ^= exp_to_poly[tmp + (z)]; \ } /* Macros to calculate the key-dependent S-boxes for a 128-bit key using * the S vector from CALC_S. CALC_SB_2 computes a single entry in all * four S-boxes, where i is the index of the entry to compute, and a and b * are the index numbers preprocessed through the q0 and q1 tables * respectively. CALC_SB is simply a convenience to make the code shorter; * it calls CALC_SB_2 four times with consecutive indices from i to i+3, * using the remaining parameters two by two. */ #define CALC_SB_2(i, a, b) \ ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \ ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \ ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \ ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh] #define CALC_SB(i, a, b, c, d, e, f, g, h) \ CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \ CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h) /* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */ #define CALC_SB256_2(i, a, b) \ ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \ ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \ ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \ ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp]; #define CALC_SB256(i, a, b, c, d, e, f, g, h) \ CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \ CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h) /* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the * last two stages of the h() function for a given index (either 2i or 2i+1). * a, b, c, and d are the four bytes going into the last two stages. For * 128-bit keys, this is the entire h() function and a and c are the index * preprocessed through q0 and q1 respectively; for longer keys they are the * output of previous stages. j is the index of the first key byte to use. * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 * twice, doing the Pseudo-Hadamard Transform, and doing the necessary * rotations. Its parameters are: a, the array to write the results into, * j, the index of the first output entry, k and l, the preprocessed indices * for index 2i, and m and n, the preprocessed indices for index 2i+1. * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two * additional lookup-and-XOR stages. The parameters a and b are the index * preprocessed through q0 and q1 respectively; j is the index of the first * key byte to use. CALC_K256 is identical to CALC_K but for using the * CALC_K256_2 macro instead of CALC_K_2. */ #define CALC_K_2(a, b, c, d, j) \ mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \ ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \ ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \ ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]] #define CALC_K(a, j, k, l, m, n) \ x = CALC_K_2 (k, l, k, l, 0); \ y = CALC_K_2 (m, n, m, n, 4); \ y = (y << 8) + (y >> 24); \ x += y; y += x; ctx->a[j] = x; \ ctx->a[(j) + 1] = (y << 9) + (y >> 23) #define CALC_K256_2(a, b, j) \ CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \ q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \ q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \ q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j) #define CALC_K256(a, j, k, l, m, n) \ x = CALC_K256_2 (k, l, 0); \ y = CALC_K256_2 (m, n, 4); \ y = (y << 8) + (y >> 24); \ x += y; y += x; ctx->a[j] = x; \ ctx->a[(j) + 1] = (y << 9) + (y >> 23) /* Perform the key setup. Note that this works only with 128- and 256-bit * keys, despite the API that looks like it might support other sizes. */ static gcry_err_code_t do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) { int i, j, k; /* Temporaries for CALC_K. */ u32 x, y; /* The S vector used to key the S-boxes, split up into individual bytes. * 128-bit keys use only sa through sh; 256-bit use all of them. */ byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0; byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; /* Temporary for CALC_S. */ unsigned int tmp; /* Flags for self-test. */ static int initialized = 0; static const char *selftest_failed=0; /* Check key length. */ if( ( ( keylen - 16 ) | 16 ) != 16 ) return GPG_ERR_INV_KEYLEN; /* Do self-test if necessary. */ if (!initialized) { initialized = 1; selftest_failed = selftest (); if( selftest_failed ) log_error("%s\n", selftest_failed ); } if( selftest_failed ) return GPG_ERR_SELFTEST_FAILED; /* Compute the first two words of the S vector. The magic numbers are * the entries of the RS matrix, preprocessed through poly_to_exp. The * numbers in the comments are the original (polynomial form) matrix * entries. */ CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ if (keylen == 32) /* 256-bit key */ { /* Calculate the remaining two words of the S vector */ CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ /* Compute the S-boxes. */ for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) { CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); } /* Calculate whitening and round subkeys. */ for (i = 0; i < 8; i += 2) { CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); } for (j = 0; j < 32; j += 2, i += 2) { CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); } } else { /* Compute the S-boxes. */ for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 ) { CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); } /* Calculate whitening and round subkeys. */ for (i = 0; i < 8; i += 2) { CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); } for (j = 0; j < 32; j += 2, i += 2) { CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); } } return 0; } static gcry_err_code_t twofish_setkey (void *context, const byte *key, unsigned int keylen) { TWOFISH_context *ctx = context; unsigned int hwfeatures = _gcry_get_hw_features (); int rc; rc = do_twofish_setkey (ctx, key, keylen); #ifdef USE_AVX2 ctx->use_avx2 = 0; if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER)) { ctx->use_avx2 = 1; } #endif (void)hwfeatures; _gcry_burn_stack (23+6*sizeof(void*)); return rc; } #ifdef USE_AVX2 /* Assembler implementations of Twofish using AVX2. Process 16 block in parallel. */ extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; #endif #ifdef USE_AMD64_ASM /* Assembly implementations of Twofish. */ extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in); extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in); /* These assembly implementations process three blocks in parallel. */ extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, byte *ctr); extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, byte *offset, byte *checksum, const u64 Ls[3]); extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, byte *offset, byte *checksum, const u64 Ls[3]); extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, byte *offset, byte *checksum, const u64 Ls[3]); -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -static inline void -call_sysv_fn (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : - : "cc", "memory", "r8", "r9", "r10", "r11"); -} - -static inline void -call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4, const void *arg5) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("movq %[arg5], %%r8\n\t" - "callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : [arg5] "g" (arg5) - : "cc", "memory", "r8", "r9", "r10", "r11"); -} - -static inline void -call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2, - const void *arg3, const void *arg4, const void *arg5, - const void *arg6) -{ - /* Call SystemV ABI function without storing non-volatile XMM registers, - * as target function does not use vector instruction sets. */ - asm volatile ("movq %[arg5], %%r8\n\t" - "movq %[arg6], %%r9\n\t" - "callq *%0\n\t" - : "+a" (fn), - "+D" (arg1), - "+S" (arg2), - "+d" (arg3), - "+c" (arg4) - : [arg5] "g" (arg5), - [arg6] "g" (arg6) - : "cc", "memory", "r8", "r9", "r10", "r11"); -} -#endif - static inline void twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL); -#else _gcry_twofish_amd64_encrypt_block(c, out, in); -#endif } static inline void twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL); -#else _gcry_twofish_amd64_decrypt_block(c, out, in); -#endif } static inline void twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, byte *ctr) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr); -#else _gcry_twofish_amd64_ctr_enc(c, out, in, ctr); -#endif } static inline void twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv); -#else _gcry_twofish_amd64_cbc_dec(c, out, in, iv); -#endif } static inline void twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv); -#else _gcry_twofish_amd64_cfb_dec(c, out, in, iv); -#endif } static inline void twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, byte *offset, byte *checksum, const u64 Ls[3]) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls); -#else _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls); -#endif } static inline void twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, byte *offset, byte *checksum, const u64 Ls[3]) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls); -#else _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls); -#endif } static inline void twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, byte *offset, byte *checksum, const u64 Ls[3]) { -#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS - call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls); -#else _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls); -#endif } #elif defined(USE_ARM_ASM) /* Assembly implementations of Twofish. */ extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in); extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in); #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ /* Macros to compute the g() function in the encryption and decryption * rounds. G1 is the straight g() function; G2 includes the 8-bit * rotation for the high 32-bit word. */ #define G1(a) \ (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \ ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24]) #define G2(b) \ (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \ ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24]) /* Encryption and decryption Feistel rounds. Each one calls the two g() * macros, does the PHT, and performs the XOR and the appropriate bit * rotations. The parameters are the round number (used to select subkeys), * and the four 32-bit chunks of the text. */ #define ENCROUND(n, a, b, c, d) \ x = G1 (a); y = G2 (b); \ x += y; y += x + ctx->k[2 * (n) + 1]; \ (c) ^= x + ctx->k[2 * (n)]; \ (c) = ((c) >> 1) + ((c) << 31); \ (d) = (((d) << 1)+((d) >> 31)) ^ y #define DECROUND(n, a, b, c, d) \ x = G1 (a); y = G2 (b); \ x += y; y += x; \ (d) ^= y + ctx->k[2 * (n) + 1]; \ (d) = ((d) >> 1) + ((d) << 31); \ (c) = (((c) << 1)+((c) >> 31)); \ (c) ^= (x + ctx->k[2 * (n)]) /* Encryption and decryption cycles; each one is simply two Feistel rounds * with the 32-bit chunks re-ordered to simulate the "swap" */ #define ENCCYCLE(n) \ ENCROUND (2 * (n), a, b, c, d); \ ENCROUND (2 * (n) + 1, c, d, a, b) #define DECCYCLE(n) \ DECROUND (2 * (n) + 1, c, d, a, b); \ DECROUND (2 * (n), a, b, c, d) /* Macros to convert the input and output bytes into 32-bit words, * and simultaneously perform the whitening step. INPACK packs word * number n into the variable named by x, using whitening subkey number m. * OUTUNPACK unpacks word number n from the variable named by x, using * whitening subkey number m. */ #define INPACK(n, x, m) \ x = buf_get_le32(in + (n) * 4); \ x ^= ctx->w[m] #define OUTUNPACK(n, x, m) \ x ^= ctx->w[m]; \ buf_put_le32(out + (n) * 4, x) #endif /*!USE_AMD64_ASM*/ /* Encrypt one block. in and out may be the same. */ #ifdef USE_AMD64_ASM static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; twofish_amd64_encrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } #elif defined(USE_ARM_ASM) static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; _gcry_twofish_arm_encrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ static void do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) { /* The four 32-bit chunks of the text. */ u32 a, b, c, d; /* Temporaries used by the round function. */ u32 x, y; /* Input whitening and packing. */ INPACK (0, a, 0); INPACK (1, b, 1); INPACK (2, c, 2); INPACK (3, d, 3); /* Encryption Feistel cycles. */ ENCCYCLE (0); ENCCYCLE (1); ENCCYCLE (2); ENCCYCLE (3); ENCCYCLE (4); ENCCYCLE (5); ENCCYCLE (6); ENCCYCLE (7); /* Output whitening and unpacking. */ OUTUNPACK (0, c, 4); OUTUNPACK (1, d, 5); OUTUNPACK (2, a, 6); OUTUNPACK (3, b, 7); } static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; do_twofish_encrypt (ctx, out, in); return /*burn_stack*/ (24+3*sizeof (void*)); } #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ /* Decrypt one block. in and out may be the same. */ #ifdef USE_AMD64_ASM static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; twofish_amd64_decrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } #elif defined(USE_ARM_ASM) static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; _gcry_twofish_arm_decrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ static void do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) { /* The four 32-bit chunks of the text. */ u32 a, b, c, d; /* Temporaries used by the round function. */ u32 x, y; /* Input whitening and packing. */ INPACK (0, c, 4); INPACK (1, d, 5); INPACK (2, a, 6); INPACK (3, b, 7); /* Encryption Feistel cycles. */ DECCYCLE (7); DECCYCLE (6); DECCYCLE (5); DECCYCLE (4); DECCYCLE (3); DECCYCLE (2); DECCYCLE (1); DECCYCLE (0); /* Output whitening and unpacking. */ OUTUNPACK (0, a, 0); OUTUNPACK (1, b, 1); OUTUNPACK (2, c, 2); OUTUNPACK (3, d, 3); } static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; do_twofish_decrypt (ctx, out, in); return /*burn_stack*/ (24+3*sizeof (void*)); } #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size TWOFISH_BLOCKSIZE. */ void _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { TWOFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[TWOFISH_BLOCKSIZE]; unsigned int burn, burn_stack_depth = 0; int i; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 16; outbuf += 16 * TWOFISH_BLOCKSIZE; inbuf += 16 * TWOFISH_BLOCKSIZE; did_use_avx2 = 1; } if (did_use_avx2) { /* twofish-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } } #endif #ifdef USE_AMD64_ASM { /* Process data in 3 block chunks. */ while (nblocks >= 3) { twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; burn = 8 * sizeof(void*); if (burn > burn_stack_depth) burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn = twofish_encrypt(ctx, tmpbuf, ctr); if (burn > burn_stack_depth) burn_stack_depth = burn; /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; inbuf += TWOFISH_BLOCKSIZE; /* Increment the counter. */ for (i = TWOFISH_BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { TWOFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[TWOFISH_BLOCKSIZE]; unsigned int burn, burn_stack_depth = 0; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * TWOFISH_BLOCKSIZE; inbuf += 16 * TWOFISH_BLOCKSIZE; did_use_avx2 = 1; } if (did_use_avx2) { /* twofish-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } } #endif #ifdef USE_AMD64_ASM { /* Process data in 3 block chunks. */ while (nblocks >= 3) { twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; burn = 9 * sizeof(void*); if (burn > burn_stack_depth) burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn = twofish_decrypt (ctx, savebuf, inbuf); if (burn > burn_stack_depth) burn_stack_depth = burn; buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE); inbuf += TWOFISH_BLOCKSIZE; outbuf += TWOFISH_BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { TWOFISH_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn, burn_stack_depth = 0; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * TWOFISH_BLOCKSIZE; inbuf += 16 * TWOFISH_BLOCKSIZE; did_use_avx2 = 1; } if (did_use_avx2) { /* twofish-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } } #endif #ifdef USE_AMD64_ASM { /* Process data in 3 block chunks. */ while (nblocks >= 3) { twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; burn = 8 * sizeof(void*); if (burn > burn_stack_depth) burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { burn = twofish_encrypt(ctx, iv, iv); if (burn > burn_stack_depth) burn_stack_depth = burn; buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE); outbuf += TWOFISH_BLOCKSIZE; inbuf += TWOFISH_BLOCKSIZE; } _gcry_burn_stack(burn_stack_depth); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { #ifdef USE_AMD64_ASM TWOFISH_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn, burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.data_nblocks; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; u64 Ls[16]; unsigned int n = 16 - (blkn % 16); u64 *l; int i; if (nblocks >= 16) { for (i = 0; i < 16; i += 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; l = &Ls[(15 + n) % 16]; /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); if (encrypt) _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 16; outbuf += 16 * TWOFISH_BLOCKSIZE; inbuf += 16 * TWOFISH_BLOCKSIZE; did_use_avx2 = 1; } } if (did_use_avx2) { /* twofish-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } } #endif { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ u64 Ls[3]; /* Process data in 3 block chunks. */ while (nblocks >= 3) { Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); blkn += 3; if (encrypt) twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; inbuf += 3 * TWOFISH_BLOCKSIZE; burn = 8 * sizeof(void*); if (burn > burn_stack_depth) burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ } c->u_mode.ocb.data_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #else (void)c; (void)outbuf_arg; (void)inbuf_arg; (void)encrypt; #endif return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { #ifdef USE_AMD64_ASM TWOFISH_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn, burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.aad_nblocks; #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; u64 Ls[16]; unsigned int n = 16 - (blkn % 16); u64 *l; int i; if (nblocks >= 16) { for (i = 0; i < 16; i += 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; l = &Ls[(15 + n) % 16]; /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 16; abuf += 16 * TWOFISH_BLOCKSIZE; did_use_avx2 = 1; } } if (did_use_avx2) { /* twofish-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ u64 Ls[3]; /* Process data in 3 block chunks. */ while (nblocks >= 3) { Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1); Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2); Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3); blkn += 3; twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 3; abuf += 3 * TWOFISH_BLOCKSIZE; burn = 8 * sizeof(void*); if (burn > burn_stack_depth) burn_stack_depth = burn; } /* Use generic code to handle smaller chunks... */ } c->u_mode.ocb.aad_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #else (void)c; (void)abuf_arg; #endif return nblocks; } /* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char * selftest_ctr (void) { const int nblocks = 16+1; const int blocksize = TWOFISH_BLOCKSIZE; const int context_size = sizeof(TWOFISH_context); return _gcry_selftest_helper_ctr("TWOFISH", &twofish_setkey, &twofish_encrypt, &_gcry_twofish_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for TWOFISH-CBC, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cbc (void) { const int nblocks = 16+2; const int blocksize = TWOFISH_BLOCKSIZE; const int context_size = sizeof(TWOFISH_context); return _gcry_selftest_helper_cbc("TWOFISH", &twofish_setkey, &twofish_encrypt, &_gcry_twofish_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for TWOFISH-CFB, tests bulk CBC decryption. Returns NULL on success. */ static const char * selftest_cfb (void) { const int nblocks = 16+2; const int blocksize = TWOFISH_BLOCKSIZE; const int context_size = sizeof(TWOFISH_context); return _gcry_selftest_helper_cfb("TWOFISH", &twofish_setkey, &twofish_encrypt, &_gcry_twofish_cfb_dec, nblocks, blocksize, context_size); } /* Test a single encryption and decryption with each key size. */ static const char* selftest (void) { TWOFISH_context ctx; /* Expanded key. */ byte scratch[16]; /* Encryption/decryption result buffer. */ const char *r; /* Test vectors for single encryption/decryption. Note that I am using * the vectors from the Twofish paper's "known answer test", I=3 for * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the * "intermediate value test", because an all-0 key would trigger all the * special cases in the RS matrix multiply, leaving the math untested. */ static byte plaintext[16] = { 0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E, 0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19 }; static byte key[16] = { 0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32, 0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A }; static const byte ciphertext[16] = { 0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85, 0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3 }; static byte plaintext_256[16] = { 0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F, 0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6 }; static byte key_256[32] = { 0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46, 0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D, 0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B, 0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F }; static const byte ciphertext_256[16] = { 0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97, 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA }; twofish_setkey (&ctx, key, sizeof(key)); twofish_encrypt (&ctx, scratch, plaintext); if (memcmp (scratch, ciphertext, sizeof (ciphertext))) return "Twofish-128 test encryption failed."; twofish_decrypt (&ctx, scratch, scratch); if (memcmp (scratch, plaintext, sizeof (plaintext))) return "Twofish-128 test decryption failed."; twofish_setkey (&ctx, key_256, sizeof(key_256)); twofish_encrypt (&ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) return "Twofish-256 test encryption failed."; twofish_decrypt (&ctx, scratch, scratch); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "Twofish-256 test decryption failed."; if ((r = selftest_ctr()) != NULL) return r; if ((r = selftest_cbc()) != NULL) return r; if ((r = selftest_cfb()) != NULL) return r; return NULL; } /* More complete test program. This does 1000 encryptions and decryptions * with each of 250 128-bit keys and 2000 encryptions and decryptions with * each of 125 256-bit keys, using a feedback scheme similar to a Feistel * cipher, so as to be sure of testing all the table entries pretty * thoroughly. We keep changing the keys so as to get a more meaningful * performance number, since the key setup is non-trivial for Twofish. */ #ifdef TEST #include #include #include int main() { TWOFISH_context ctx; /* Expanded key. */ int i, j; /* Loop counters. */ const char *encrypt_msg; /* Message to print regarding encryption test; * the printf is done outside the loop to avoid * stuffing up the timing. */ clock_t timer; /* For computing elapsed time. */ /* Test buffer. */ byte buffer[4][16] = { {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} }; /* Expected outputs for the million-operation test */ static const byte test_encrypt[4][16] = { {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13, 0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B}, {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E, 0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27}, {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64, 0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73}, {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8, 0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57} }; static const byte test_decrypt[4][16] = { {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78, 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0}, {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10}, {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98} }; /* Start the timer ticking. */ timer = clock (); /* Encryption test. */ for (i = 0; i < 125; i++) { twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); for (j = 0; j < 1000; j++) twofish_encrypt (&ctx, buffer[2], buffer[2]); twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); for (j = 0; j < 1000; j++) twofish_encrypt (&ctx, buffer[3], buffer[3]); twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); for (j = 0; j < 1000; j++) { twofish_encrypt (&ctx, buffer[0], buffer[0]); twofish_encrypt (&ctx, buffer[1], buffer[1]); } } encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ? "encryption failure!\n" : "encryption OK!\n"; /* Decryption test. */ for (i = 0; i < 125; i++) { twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2); for (j = 0; j < 1000; j++) { twofish_decrypt (&ctx, buffer[0], buffer[0]); twofish_decrypt (&ctx, buffer[1], buffer[1]); } twofish_setkey (&ctx, buffer[1], sizeof (buffer[1])); for (j = 0; j < 1000; j++) twofish_decrypt (&ctx, buffer[3], buffer[3]); twofish_setkey (&ctx, buffer[0], sizeof (buffer[0])); for (j = 0; j < 1000; j++) twofish_decrypt (&ctx, buffer[2], buffer[2]); } /* Stop the timer, and print results. */ timer = clock () - timer; printf (encrypt_msg); printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ? "decryption failure!\n" : "decryption OK!\n"); printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC); return 0; } #endif /* TEST */ gcry_cipher_spec_t _gcry_cipher_spec_twofish = { GCRY_CIPHER_TWOFISH, {0, 0}, "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context), twofish_setkey, twofish_encrypt, twofish_decrypt }; gcry_cipher_spec_t _gcry_cipher_spec_twofish128 = { GCRY_CIPHER_TWOFISH128, {0, 0}, "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context), twofish_setkey, twofish_encrypt, twofish_decrypt };